In [1]:
import cPickle as pik
import time
import dateutil.parser as p
def parseTime(t):
return time.strptime(t,"%d-%b-%Y %H:%M")
parseTime("6-AUG-1983 13:40")
Out[1]:
In [2]:
st="From: MCCARTHY 6-AUG-1983 13:40 "
st.strip().split(" ")
Out[2]:
In [3]:
st="From: ERNIE::SHEPPERD 21-JUL-1983 20:02 "
a=st.strip().split(" ")
a[-2]+" "+a[-1]
Out[3]:
In [31]:
def parsestate(line,state):
a=line.strip().split("\t")
curstate=state
val=""
ti=""
try:
if a[0] == "From:" and state=="reset":
curstate="from"
b=a[1].strip(' \t').split(" ")
val=b[0]
ti=p.parse(b[-2]+" "+b[-1])
elif a[0] == "To:" and len(a)==2 and state=="from":
curstate="to"
val=a[-1].strip(' \t').split(",")
elif a[0]== "___________________________________________________________________________" and (state=="to" or state==""):
curstate="reset"
return ((curstate,val,ti))
except:
print a
print curstate,state
parsestate("","To: @DB16K ")
Out[31]:
In [34]:
data=[]
def process(foo):
tup=[None]*3
src=None
ti = None
state=""
for l in open(foo,'r'):
sta,node,val = parsestate(l,state)
if sta == "from":
src=node
ti=val
state="from"
elif sta == "reset":
state = "reset"
tup=[None]*3
elif sta == "to":
state="to"
for x in node:
tup[0]=src
tup[2]=ti
tup[1] = x
data.append(tup)
tup=[None]*3
tup=[None]*3
fi =["/home/raj/projects/atari/data/Vax"+str(k)+".txt" for k in range(83,93,1)]
for foo in fi:
print "on "+foo
process(foo)
len(data)
Out[34]:
In [39]:
data[5262][2].strftime("%Y-%m-%d")
Out[39]:
In [37]:
len(data)
Out[37]:
In [38]:
pik.dump(data,open("/home/raj/projects/atari/data/mailElist.pik","wb"))
In [ ]: