In [1]:
import sys
import os
import gzip
from collections import namedtuple
In [ ]:
def openFile(name, mode):
if name.lower().endswith('.gz'):
return gzip.open(name, mode+'b')
else:
return open(name, mode)
In [2]:
Entry = namedtuple('Entry',['ts','cluster'])
Writer = namedtuple('Writer',['start','end','fid'])
In [3]:
def makeCounters(fPrefix,start,length,overlap,num):
c = list()
for x in range(num):
s = float(start) + length*x
e = s + length
s0 = s + length*overlap
e0 = s0 + length
w1 = Writer(s,e,openFile('%s%i-%i.out' % (fPrefix,int(s),int(e)),'w'))
c.append(w1)
if (overlap !=0):
w2 = Writer(s0,e0,openFile('%s%i-%i.out' % (fPrefix,int(s0),int(e0)),'w'))
c.append(w2)
return c
In [4]:
def delZero(path,prefix):
files = os.listdir(path)
for f in files:
if f.startswith(prefix) and (os.path.getsize(f) == 0):
os.remove(f)
In [5]:
def putLogLine(entry,writers):
putSomewhere = False
for w in writers:
if (float(entry.ts) >= float(w.start)) and (float(entry.ts) <= float(w.end)):
w.fid.write('%s ' % (entry.cluster))
putSomewhere = True
else:
if float(entry.ts) > float(w.end):
if not w.fid.closed:
w.fid.close()
if not putSomewhere:
print 'BAD BAD BAD BAD'
for w in writers:
if not w.fid.closed:
return False
return True
In [6]:
def writeThings(f,fPrefix,length,overlap):
initialized = False
startingPoint = 0.0
writers = list()
read = 0
for current in f.readlines():
c,d,e = current.split(',',2)
entry = Entry(c,d)
if not initialized:
startingPoint = entry.ts
initialized = True
writers = makeCounters(fPrefix,startingPoint,length,overlap,100)
moreNeeded = putLogLine(entry,writers)
read += 1
if moreNeeded:
print 'moar writers'
startingPoint = entry.ts
writers = makeCounters(fPrefix,startingPoint,length,overlap,100)
putLogLine(entry,writers)
for w in writers:
w.fid.close()
print 'read:',read
In [7]:
prefix = 'tbirdBig'
In [8]:
dataDir = '/Users/dgrossman/data'
In [9]:
inputFile = 'tbird.log.preProc.200.supports.out'
In [10]:
length=2*60*60
overlap=0
In [11]:
os.chdir(dataDir)
In [12]:
writeThings(openFile(inputFile,'r'),prefix,length,overlap)
In [13]:
delZero(dataDir,prefix)
In [ ]: