In [1]:
import cPickle as pik
import time
import dateutil.parser as p

def parseTime(t):
    return time.strptime(t,"%d-%b-%Y %H:%M")

parseTime("6-AUG-1983 13:40")


Out[1]:
time.struct_time(tm_year=1983, tm_mon=8, tm_mday=6, tm_hour=13, tm_min=40, tm_sec=0, tm_wday=5, tm_yday=218, tm_isdst=-1)

In [2]:
st="From:   MCCARTHY        6-AUG-1983 13:40 "
st.strip().split(" ")


Out[2]:
['From:',
 '',
 '',
 'MCCARTHY',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '6-AUG-1983',
 '13:40']

In [3]:
st="From:   ERNIE::SHEPPERD       21-JUL-1983 20:02  "
a=st.strip().split(" ")
a[-2]+" "+a[-1]


Out[3]:
'21-JUL-1983 20:02'

In [31]:
def parsestate(line,state):
    a=line.strip().split("\t")
    curstate=state
    val=""
    ti=""
    try:
        if a[0] == "From:" and state=="reset":
            curstate="from"
            b=a[1].strip(' \t').split(" ")
            val=b[0]
            ti=p.parse(b[-2]+" "+b[-1])
        elif a[0] == "To:" and len(a)==2 and state=="from":
            curstate="to"
            val=a[-1].strip(' \t').split(",")
        elif a[0]== "___________________________________________________________________________" and (state=="to" or state==""):
            curstate="reset"
        
        return ((curstate,val,ti))
    except: 
        print a
        print curstate,state
        
parsestate("","To:     @DB16K ")


Out[31]:
('To:     @DB16K ', '', '')

In [34]:
data=[]
def process(foo):
    tup=[None]*3
    src=None
    ti = None
    state=""
    for l in open(foo,'r'):
        sta,node,val = parsestate(l,state)
        if sta == "from":
            src=node
            ti=val
            state="from"
        elif sta == "reset":
            state = "reset"
            tup=[None]*3
        elif sta == "to":
            state="to"
            for x in node:
                tup[0]=src
                tup[2]=ti
                tup[1] = x
                data.append(tup)
                tup=[None]*3
            tup=[None]*3


fi =["/home/raj/projects/atari/data/Vax"+str(k)+".txt" for k in range(83,93,1)]

for foo in fi:
    print "on "+foo
    process(foo)
len(data)


on /home/raj/projects/atari/data/Vax83.txt
on /home/raj/projects/atari/data/Vax84.txt
on /home/raj/projects/atari/data/Vax85.txt
on /home/raj/projects/atari/data/Vax86.txt
on /home/raj/projects/atari/data/Vax87.txt
on /home/raj/projects/atari/data/Vax88.txt
on /home/raj/projects/atari/data/Vax89.txt
on /home/raj/projects/atari/data/Vax90.txt
on /home/raj/projects/atari/data/Vax91.txt
on /home/raj/projects/atari/data/Vax92.txt
Out[34]:
5887

In [39]:
data[5262][2].strftime("%Y-%m-%d")


Out[39]:
'1992-03-16'

In [37]:
len(data)


Out[37]:
5887

In [38]:
pik.dump(data,open("/home/raj/projects/atari/data/mailElist.pik","wb"))

In [ ]: