In [1]:
import sys
import os
import gzip
from collections import namedtuple

In [ ]:
def openFile(name, mode):
    if name.lower().endswith('.gz'):
        return gzip.open(name, mode+'b')
    else:
        return open(name, mode)

In [2]:
Entry = namedtuple('Entry',['ts','cluster'])
Writer = namedtuple('Writer',['start','end','fid'])

In [3]:
def makeCounters(fPrefix,start,length,overlap,num):
    c = list()
    for x in range(num):
        s = float(start) + length*x
        e =  s + length
        s0 = s + length*overlap
        e0 = s0 + length
        
        w1 = Writer(s,e,openFile('%s%i-%i.out' % (fPrefix,int(s),int(e)),'w'))
        
        c.append(w1)
        
        if (overlap !=0):
            w2 = Writer(s0,e0,openFile('%s%i-%i.out' % (fPrefix,int(s0),int(e0)),'w'))
            c.append(w2)
        
    return c

In [4]:
def delZero(path,prefix):
    files =  os.listdir(path)
    for f in files:
        if f.startswith(prefix) and (os.path.getsize(f) == 0):
            os.remove(f)

In [5]:
def putLogLine(entry,writers):
    putSomewhere = False
    for w in writers:
        if (float(entry.ts) >= float(w.start)) and (float(entry.ts) <= float(w.end)):
            w.fid.write('%s ' % (entry.cluster))
            putSomewhere = True
        else:
            if float(entry.ts) > float(w.end):
                if not w.fid.closed:
                    w.fid.close()
                
    if not putSomewhere:
        print 'BAD BAD BAD BAD'
        
    for w in writers:
        if not w.fid.closed:
            return False
    return True

In [6]:
def writeThings(f,fPrefix,length,overlap):
    initialized = False
    startingPoint = 0.0
    writers = list()
    read = 0
    for current in f.readlines():
        
        c,d,e = current.split(',',2)
        entry = Entry(c,d)
        if not initialized:
            startingPoint = entry.ts
            initialized = True
            writers = makeCounters(fPrefix,startingPoint,length,overlap,100)
        moreNeeded = putLogLine(entry,writers)
        read += 1
        if moreNeeded:
            print 'moar writers'
            startingPoint = entry.ts
            writers = makeCounters(fPrefix,startingPoint,length,overlap,100)
            putLogLine(entry,writers)
    
    for w in writers:
        w.fid.close()
    
    print 'read:',read

In [7]:
prefix = 'tbirdBig'

In [8]:
dataDir = '/Users/dgrossman/data'

In [9]:
inputFile = 'tbird.log.preProc.200.supports.out'

In [10]:
length=2*60*60
overlap=0

In [11]:
os.chdir(dataDir)

In [12]:
writeThings(openFile(inputFile,'r'),prefix,length,overlap)

In [13]:
delZero(dataDir,prefix)

In [ ]: