notebook.community

Edit and run



In [1]:

    
import os, sys, email,re
import pandas as pd
import numpy as np

np.random.seed(42)

import networkx as nx

import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns



In [32]:

    
df = pd.read_csv('data/enron/emails.csv', header=0)
df.head()









    Out[32]:







  
    
      
      file
      message
    
  
  
    
      0
      allen-p/_sent_mail/1.
      Message-ID: <18782981.1075855378110.JavaMail.e...
    
    
      1
      allen-p/_sent_mail/10.
      Message-ID: <15464986.1075855378456.JavaMail.e...
    
    
      2
      allen-p/_sent_mail/100.
      Message-ID: <24216240.1075855687451.JavaMail.e...
    
    
      3
      allen-p/_sent_mail/1000.
      Message-ID: <13505866.1075863688222.JavaMail.e...
    
    
      4
      allen-p/_sent_mail/1001.
      Message-ID: <30922949.1075863688243.JavaMail.e...



In [33]:

    
list(df) # show all attributes









    Out[33]:





['file', 'message']



In [34]:

    
print(df['message'][0])









    



Message-ID: <18782981.1075855378110.JavaMail.evans@thyme>
Date: Mon, 14 May 2001 16:39:00 -0700 (PDT)
From: phillip.allen@enron.com
To: tim.belden@enron.com
Subject: 
Mime-Version: 1.0
Content-Type: text/plain; charset=us-ascii
Content-Transfer-Encoding: 7bit
X-From: Phillip K Allen
X-To: Tim Belden <Tim Belden/Enron@EnronXGate>
X-cc: 
X-bcc: 
X-Folder: \Phillip_Allen_Jan2002_1\Allen, Phillip K.\'Sent Mail
X-Origin: Allen-P
X-FileName: pallen (Non-Privileged).pst

Here is our forecast



In [35]:

    
df.shape









    Out[35]:





(517401, 2)



In [36]:

    
## Helper functions
def get_text_from_email(msg):
    '''To get the content from email objects'''
    parts = []
    for part in msg.walk():
        if part.get_content_type() == 'text/plain':
            parts.append( part.get_payload() )
    return ''.join(parts)

def split_email_addresses(line):
    '''To separate multiple email addresses'''
    if line:
        addrs = line.split(',')
        addrs = frozenset(map(lambda x: x.strip(), addrs))
    else:
        addrs = None
    return addrs



In [37]:

    
# Parse the emails into a list email objects
messages = list(map(email.message_from_string, df['message']))
df.drop('message', axis=1, inplace=True)
# Get fields from parsed email objects
keys = messages[0].keys()
for key in keys:
    df[key] = [doc[key] for doc in messages]
# Parse content from emails
df['content'] = list(map(get_text_from_email, messages))
# Split multiple email addresses
df['From'] = df['From'].map(split_email_addresses)
df['To'] = df['To'].map(split_email_addresses)

# Extract the root of 'file' as 'user'
df['user'] = df['file'].map(lambda x:x.split('/')[0])
del messages

df.head()









    Out[37]:







  
    
      
      file
      Message-ID
      Date
      From
      To
      Subject
      Mime-Version
      Content-Type
      Content-Transfer-Encoding
      X-From
      X-To
      X-cc
      X-bcc
      X-Folder
      X-Origin
      X-FileName
      content
      user
    
  
  
    
      0
      allen-p/_sent_mail/1.
      <18782981.1075855378110.JavaMail.evans@thyme>
      Mon, 14 May 2001 16:39:00 -0700 (PDT)
      (phillip.allen@enron.com)
      (tim.belden@enron.com)
      
      1.0
      text/plain; charset=us-ascii
      7bit
      Phillip K Allen
      Tim Belden <Tim Belden/Enron@EnronXGate>
      
      
      \Phillip_Allen_Jan2002_1\Allen, Phillip K.\'Se...
      Allen-P
      pallen (Non-Privileged).pst
      Here is our forecast\n\n
      allen-p
    
    
      1
      allen-p/_sent_mail/10.
      <15464986.1075855378456.JavaMail.evans@thyme>
      Fri, 4 May 2001 13:51:00 -0700 (PDT)
      (phillip.allen@enron.com)
      (john.lavorato@enron.com)
      Re:
      1.0
      text/plain; charset=us-ascii
      7bit
      Phillip K Allen
      John J Lavorato <John J Lavorato/ENRON@enronXg...
      
      
      \Phillip_Allen_Jan2002_1\Allen, Phillip K.\'Se...
      Allen-P
      pallen (Non-Privileged).pst
      Traveling to have a business meeting takes the...
      allen-p
    
    
      2
      allen-p/_sent_mail/100.
      <24216240.1075855687451.JavaMail.evans@thyme>
      Wed, 18 Oct 2000 03:00:00 -0700 (PDT)
      (phillip.allen@enron.com)
      (leah.arsdall@enron.com)
      Re: test
      1.0
      text/plain; charset=us-ascii
      7bit
      Phillip K Allen
      Leah Van Arsdall
      
      
      \Phillip_Allen_Dec2000\Notes Folders\'sent mail
      Allen-P
      pallen.nsf
      test successful.  way to go!!!
      allen-p
    
    
      3
      allen-p/_sent_mail/1000.
      <13505866.1075863688222.JavaMail.evans@thyme>
      Mon, 23 Oct 2000 06:13:00 -0700 (PDT)
      (phillip.allen@enron.com)
      (randall.gay@enron.com)
      
      1.0
      text/plain; charset=us-ascii
      7bit
      Phillip K Allen
      Randall L Gay
      
      
      \Phillip_Allen_Dec2000\Notes Folders\'sent mail
      Allen-P
      pallen.nsf
      Randy,\n\n Can you send me a schedule of the s...
      allen-p
    
    
      4
      allen-p/_sent_mail/1001.
      <30922949.1075863688243.JavaMail.evans@thyme>
      Thu, 31 Aug 2000 05:07:00 -0700 (PDT)
      (phillip.allen@enron.com)
      (greg.piper@enron.com)
      Re: Hello
      1.0
      text/plain; charset=us-ascii
      7bit
      Phillip K Allen
      Greg Piper
      
      
      \Phillip_Allen_Dec2000\Notes Folders\'sent mail
      Allen-P
      pallen.nsf
      Let's shoot for Tuesday at 11:45.
      allen-p

Clean Data



In [38]:

    
# Parse datetime
df['Date'] = pd.to_datetime(df['Date'], infer_datetime_format=True)
df.dtypes









    Out[38]:





file                                 object
Message-ID                           object
Date                         datetime64[ns]
From                                 object
To                                   object
Subject                              object
Mime-Version                         object
Content-Type                         object
Content-Transfer-Encoding            object
X-From                               object
X-To                                 object
X-cc                                 object
X-bcc                                object
X-Folder                             object
X-Origin                             object
X-FileName                           object
content                              object
user                                 object
dtype: object

What should be the index? Message-ID or Date timestamp



In [39]:

    
# Set index and drop columns with two few values
df = df.set_index('Message-ID')\
    .drop(['file', 'Mime-Version', 'Content-Type', 'Content-Transfer-Encoding'], axis=1)



In [40]:

    
df.head()









    Out[40]:







  
    
      
      Date
      From
      To
      Subject
      X-From
      X-To
      X-cc
      X-bcc
      X-Folder
      X-Origin
      X-FileName
      content
      user
    
    
      Message-ID
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      <18782981.1075855378110.JavaMail.evans@thyme>
      2001-05-14 23:39:00
      (phillip.allen@enron.com)
      (tim.belden@enron.com)
      
      Phillip K Allen
      Tim Belden <Tim Belden/Enron@EnronXGate>
      
      
      \Phillip_Allen_Jan2002_1\Allen, Phillip K.\'Se...
      Allen-P
      pallen (Non-Privileged).pst
      Here is our forecast\n\n
      allen-p
    
    
      <15464986.1075855378456.JavaMail.evans@thyme>
      2001-05-04 20:51:00
      (phillip.allen@enron.com)
      (john.lavorato@enron.com)
      Re:
      Phillip K Allen
      John J Lavorato <John J Lavorato/ENRON@enronXg...
      
      
      \Phillip_Allen_Jan2002_1\Allen, Phillip K.\'Se...
      Allen-P
      pallen (Non-Privileged).pst
      Traveling to have a business meeting takes the...
      allen-p
    
    
      <24216240.1075855687451.JavaMail.evans@thyme>
      2000-10-18 10:00:00
      (phillip.allen@enron.com)
      (leah.arsdall@enron.com)
      Re: test
      Phillip K Allen
      Leah Van Arsdall
      
      
      \Phillip_Allen_Dec2000\Notes Folders\'sent mail
      Allen-P
      pallen.nsf
      test successful.  way to go!!!
      allen-p
    
    
      <13505866.1075863688222.JavaMail.evans@thyme>
      2000-10-23 13:13:00
      (phillip.allen@enron.com)
      (randall.gay@enron.com)
      
      Phillip K Allen
      Randall L Gay
      
      
      \Phillip_Allen_Dec2000\Notes Folders\'sent mail
      Allen-P
      pallen.nsf
      Randy,\n\n Can you send me a schedule of the s...
      allen-p
    
    
      <30922949.1075863688243.JavaMail.evans@thyme>
      2000-08-31 12:07:00
      (phillip.allen@enron.com)
      (greg.piper@enron.com)
      Re: Hello
      Phillip K Allen
      Greg Piper
      
      
      \Phillip_Allen_Dec2000\Notes Folders\'sent mail
      Allen-P
      pallen.nsf
      Let's shoot for Tuesday at 11:45.
      allen-p

Exploratory Analysis

Who was sending to whom



In [4]:

    
## Save as csv
df.to_csv('data/enron/enron_cleaned.csv')









    



-----------------------------------------------
NameError     Traceback (most recent call last)
<ipython-input-4-cafb11133188> in <module>()
      1 ## Save as csv
----> 2 df.to_csv('data/enron/enron_cleaned.csv')

NameError: name 'df' is not defined



In [2]:

    
df = pd.read_csv('data/enron/enron_cleaned.csv', parse_dates=[1])
df.head()









    Out[2]:







  
    
      
      Message-ID
      Date
      From
      To
      Subject
      X-From
      X-To
      X-cc
      X-bcc
      X-Folder
      X-Origin
      X-FileName
      content
      user
    
  
  
    
      0
      <18782981.1075855378110.JavaMail.evans@thyme>
      2001-05-14 23:39:00
      frozenset({'phillip.allen@enron.com'})
      frozenset({'tim.belden@enron.com'})
      NaN
      Phillip K Allen
      Tim Belden <Tim Belden/Enron@EnronXGate>
      NaN
      NaN
      \Phillip_Allen_Jan2002_1\Allen, Phillip K.\'Se...
      Allen-P
      pallen (Non-Privileged).pst
      Here is our forecast\n\n
      allen-p
    
    
      1
      <15464986.1075855378456.JavaMail.evans@thyme>
      2001-05-04 20:51:00
      frozenset({'phillip.allen@enron.com'})
      frozenset({'john.lavorato@enron.com'})
      Re:
      Phillip K Allen
      John J Lavorato <John J Lavorato/ENRON@enronXg...
      NaN
      NaN
      \Phillip_Allen_Jan2002_1\Allen, Phillip K.\'Se...
      Allen-P
      pallen (Non-Privileged).pst
      Traveling to have a business meeting takes the...
      allen-p
    
    
      2
      <24216240.1075855687451.JavaMail.evans@thyme>
      2000-10-18 10:00:00
      frozenset({'phillip.allen@enron.com'})
      frozenset({'leah.arsdall@enron.com'})
      Re: test
      Phillip K Allen
      Leah Van Arsdall
      NaN
      NaN
      \Phillip_Allen_Dec2000\Notes Folders\'sent mail
      Allen-P
      pallen.nsf
      test successful.  way to go!!!
      allen-p
    
    
      3
      <13505866.1075863688222.JavaMail.evans@thyme>
      2000-10-23 13:13:00
      frozenset({'phillip.allen@enron.com'})
      frozenset({'randall.gay@enron.com'})
      NaN
      Phillip K Allen
      Randall L Gay
      NaN
      NaN
      \Phillip_Allen_Dec2000\Notes Folders\'sent mail
      Allen-P
      pallen.nsf
      Randy,\n\n Can you send me a schedule of the s...
      allen-p
    
    
      4
      <30922949.1075863688243.JavaMail.evans@thyme>
      2000-08-31 12:07:00
      frozenset({'phillip.allen@enron.com'})
      frozenset({'greg.piper@enron.com'})
      Re: Hello
      Phillip K Allen
      Greg Piper
      NaN
      NaN
      \Phillip_Allen_Dec2000\Notes Folders\'sent mail
      Allen-P
      pallen.nsf
      Let's shoot for Tuesday at 11:45.
      allen-p



In [3]:

    
df.dtypes









    Out[3]:





Message-ID            object
Date          datetime64[ns]
From                  object
To                    object
Subject               object
X-From                object
X-To                  object
X-cc                  object
X-bcc                 object
X-Folder              object
X-Origin              object
X-FileName            object
content               object
user                  object
dtype: object



In [4]:

    
len(df.From.unique()) # how many unique senders









    Out[4]:





20328



In [5]:

    
len(df.To.unique()) # how many unique senders









    Out[5]:





55420



In [6]:

    
G = nx.from_pandas_dataframe(df, 'From', 'To')



In [ ]:

    
spring_lay = nx.spring_layout(G)



In [ ]:

    
nx.draw_networkx_nodes(G, spring_lay, node_size=20)
nx.draw_networkx_edges(G, spring_lay, alpha=0.4)

Email Body Content



In [16]:

    
def clean(text):
    stop = set(stopwords.words('english'))
    stop.update(("to","cc","subject","http","from","sent","aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"))
    exclude = set(string.punctuation) 
    lemma = WordNetLemmatizer()
    porter= PorterStemmer()
    
    text=text.rstrip()
    text = re.sub(r'[^a-zA-Z]', ' ', text)
    stop_free = " ".join([i for i in text.lower().split() if((i not in stop) and (not i.isdigit()))])
    punc_free = ''.join(ch for ch in stop_free if ch not in exclude)
    normalized = " ".join(lemma.lemmatize(word) for word in punc_free.split())
    #stem = " ".join(porter.stem(token) for token in normalized.split())
    
    return normalized



In [17]:

    
analysis_df = df[['Date', 'From', 'To', 'Date','content']].dropna().copy()
analysis_df.shape









    Out[17]:





(495554, 5)



In [23]:

    
analysis_df.head()









    Out[23]:







  
    
      
      Date
      From
      To
      Date
      content
    
  
  
    
      0
      Mon, 14 May 2001 16:39:00 -0700 (PDT)
      (phillip.allen@enron.com)
      (tim.belden@enron.com)
      Mon, 14 May 2001 16:39:00 -0700 (PDT)
      Here is our forecast\n\n
    
    
      1
      Fri, 4 May 2001 13:51:00 -0700 (PDT)
      (phillip.allen@enron.com)
      (john.lavorato@enron.com)
      Fri, 4 May 2001 13:51:00 -0700 (PDT)
      Traveling to have a business meeting takes the...
    
    
      2
      Wed, 18 Oct 2000 03:00:00 -0700 (PDT)
      (phillip.allen@enron.com)
      (leah.arsdall@enron.com)
      Wed, 18 Oct 2000 03:00:00 -0700 (PDT)
      test successful.  way to go!!!
    
    
      3
      Mon, 23 Oct 2000 06:13:00 -0700 (PDT)
      (phillip.allen@enron.com)
      (randall.gay@enron.com)
      Mon, 23 Oct 2000 06:13:00 -0700 (PDT)
      Randy,\n\n Can you send me a schedule of the s...
    
    
      4
      Thu, 31 Aug 2000 05:07:00 -0700 (PDT)
      (phillip.allen@enron.com)
      (greg.piper@enron.com)
      Thu, 31 Aug 2000 05:07:00 -0700 (PDT)
      Let's shoot for Tuesday at 11:45.



In [ ]:

    
fig, ax = plt.subplots(figsize=(25, 10))
sns.countplot(ax=ax, x="From", data=df)
plt.xticks(rotation=45)



In [ ]:

    
fig, ax = plt.subplots(figsize=(25, 10))
sns.countplot(ax=ax, x="To", data=df)
plt.xticks(rotation=45)



In [ ]:



In [ ]:



In [ ]:



In [29]:

    
df['Date'] = pd.to_datetime(analysis_df.Date)









    



------------------------------
ValueErrorTraceback (most recent call last)
<ipython-input-29-62ecd159cce1> in <module>()
----> 1 df['Date'] = pd.to_datetime(analysis_df.Date)

/Users/carrie/anaconda/envs/mlbook/lib/python3.5/site-packages/pandas/core/tools/datetimes.py in to_datetime(arg, errors, dayfirst, yearfirst, utc, box, format, exact, unit, infer_datetime_format, origin)
    510         result = Series(values, index=arg.index, name=arg.name)
    511     elif isinstance(arg, (ABCDataFrame, MutableMapping)):
--> 512         result = _assemble_from_unit_mappings(arg, errors=errors)
    513     elif isinstance(arg, ABCIndexClass):
    514         result = _convert_listlike(arg, box, format, name=arg.name)

/Users/carrie/anaconda/envs/mlbook/lib/python3.5/site-packages/pandas/core/tools/datetimes.py in _assemble_from_unit_mappings(arg, errors)
    567     arg = DataFrame(arg)
    568     if not arg.columns.is_unique:
--> 569         raise ValueError("cannot assemble with duplicate keys")
    570 
    571     # replace passed unit with _unit_map

ValueError: cannot assemble with duplicate keys



In [19]:

    
test = analysis_df.set_index('Date')
test.head()









    Out[19]:







  
    
      
      From
      To
      content
    
    
      Date
      
      
      
    
  
  
    
      (Mon, 14 May 2001 16:39:00 -0700 (PDT), Mon, 14 May 2001 16:39:00 -0700 (PDT))
      (phillip.allen@enron.com)
      (tim.belden@enron.com)
      Here is our forecast\n\n
    
    
      (Fri, 4 May 2001 13:51:00 -0700 (PDT), Fri, 4 May 2001 13:51:00 -0700 (PDT))
      (phillip.allen@enron.com)
      (john.lavorato@enron.com)
      Traveling to have a business meeting takes the...
    
    
      (Wed, 18 Oct 2000 03:00:00 -0700 (PDT), Wed, 18 Oct 2000 03:00:00 -0700 (PDT))
      (phillip.allen@enron.com)
      (leah.arsdall@enron.com)
      test successful.  way to go!!!
    
    
      (Mon, 23 Oct 2000 06:13:00 -0700 (PDT), Mon, 23 Oct 2000 06:13:00 -0700 (PDT))
      (phillip.allen@enron.com)
      (randall.gay@enron.com)
      Randy,\n\n Can you send me a schedule of the s...
    
    
      (Thu, 31 Aug 2000 05:07:00 -0700 (PDT), Thu, 31 Aug 2000 05:07:00 -0700 (PDT))
      (phillip.allen@enron.com)
      (greg.piper@enron.com)
      Let's shoot for Tuesday at 11:45.



In [ ]:

	file	message
0	allen-p/_sent_mail/1.	Message-ID: <18782981.1075855378110.JavaMail.e...
1	allen-p/_sent_mail/10.	Message-ID: <15464986.1075855378456.JavaMail.e...
2	allen-p/_sent_mail/100.	Message-ID: <24216240.1075855687451.JavaMail.e...
3	allen-p/_sent_mail/1000.	Message-ID: <13505866.1075863688222.JavaMail.e...
4	allen-p/_sent_mail/1001.	Message-ID: <30922949.1075863688243.JavaMail.e...

	file	Message-ID	Date	From	To	Subject	Mime-Version	Content-Type	Content-Transfer-Encoding	X-From	X-To	X-Folder	X-Origin	X-FileName	content	user
0	allen-p/_sent_mail/1.	<18782981.1075855378110.JavaMail.evans@thyme>	Mon, 14 May 2001 16:39:00 -0700 (PDT)	(phillip.allen@enron.com)	(tim.belden@enron.com)		1.0	text/plain; charset=us-ascii	7bit	Phillip K Allen	Tim Belden <Tim Belden/Enron@EnronXGate>	\Phillip_Allen_Jan2002_1\Allen, Phillip K.\'Se...	Allen-P	pallen (Non-Privileged).pst	Here is our forecast\n\n	allen-p
1	allen-p/_sent_mail/10.	<15464986.1075855378456.JavaMail.evans@thyme>	Fri, 4 May 2001 13:51:00 -0700 (PDT)	(phillip.allen@enron.com)	(john.lavorato@enron.com)	Re:	1.0	text/plain; charset=us-ascii	7bit	Phillip K Allen	John J Lavorato <John J Lavorato/ENRON@enronXg...	\Phillip_Allen_Jan2002_1\Allen, Phillip K.\'Se...	Allen-P	pallen (Non-Privileged).pst	Traveling to have a business meeting takes the...	allen-p
2	allen-p/_sent_mail/100.	<24216240.1075855687451.JavaMail.evans@thyme>	Wed, 18 Oct 2000 03:00:00 -0700 (PDT)	(phillip.allen@enron.com)	(leah.arsdall@enron.com)	Re: test	1.0	text/plain; charset=us-ascii	7bit	Phillip K Allen	Leah Van Arsdall	\Phillip_Allen_Dec2000\Notes Folders\'sent mail	Allen-P	pallen.nsf	test successful. way to go!!!	allen-p
3	allen-p/_sent_mail/1000.	<13505866.1075863688222.JavaMail.evans@thyme>	Mon, 23 Oct 2000 06:13:00 -0700 (PDT)	(phillip.allen@enron.com)	(randall.gay@enron.com)		1.0	text/plain; charset=us-ascii	7bit	Phillip K Allen	Randall L Gay	\Phillip_Allen_Dec2000\Notes Folders\'sent mail	Allen-P	pallen.nsf	Randy,\n\n Can you send me a schedule of the s...	allen-p
4	allen-p/_sent_mail/1001.	<30922949.1075863688243.JavaMail.evans@thyme>	Thu, 31 Aug 2000 05:07:00 -0700 (PDT)	(phillip.allen@enron.com)	(greg.piper@enron.com)	Re: Hello	1.0	text/plain; charset=us-ascii	7bit	Phillip K Allen	Greg Piper	\Phillip_Allen_Dec2000\Notes Folders\'sent mail	Allen-P	pallen.nsf	Let's shoot for Tuesday at 11:45.	allen-p

	Date	From	To	Subject	X-From	X-To	X-Folder	X-Origin	X-FileName	content	user
Message-ID
<18782981.1075855378110.JavaMail.evans@thyme>	2001-05-14 23:39:00	(phillip.allen@enron.com)	(tim.belden@enron.com)		Phillip K Allen	Tim Belden <Tim Belden/Enron@EnronXGate>	\Phillip_Allen_Jan2002_1\Allen, Phillip K.\'Se...	Allen-P	pallen (Non-Privileged).pst	Here is our forecast\n\n	allen-p
<15464986.1075855378456.JavaMail.evans@thyme>	2001-05-04 20:51:00	(phillip.allen@enron.com)	(john.lavorato@enron.com)	Re:	Phillip K Allen	John J Lavorato <John J Lavorato/ENRON@enronXg...	\Phillip_Allen_Jan2002_1\Allen, Phillip K.\'Se...	Allen-P	pallen (Non-Privileged).pst	Traveling to have a business meeting takes the...	allen-p
<24216240.1075855687451.JavaMail.evans@thyme>	2000-10-18 10:00:00	(phillip.allen@enron.com)	(leah.arsdall@enron.com)	Re: test	Phillip K Allen	Leah Van Arsdall	\Phillip_Allen_Dec2000\Notes Folders\'sent mail	Allen-P	pallen.nsf	test successful. way to go!!!	allen-p
<13505866.1075863688222.JavaMail.evans@thyme>	2000-10-23 13:13:00	(phillip.allen@enron.com)	(randall.gay@enron.com)		Phillip K Allen	Randall L Gay	\Phillip_Allen_Dec2000\Notes Folders\'sent mail	Allen-P	pallen.nsf	Randy,\n\n Can you send me a schedule of the s...	allen-p
<30922949.1075863688243.JavaMail.evans@thyme>	2000-08-31 12:07:00	(phillip.allen@enron.com)	(greg.piper@enron.com)	Re: Hello	Phillip K Allen	Greg Piper	\Phillip_Allen_Dec2000\Notes Folders\'sent mail	Allen-P	pallen.nsf	Let's shoot for Tuesday at 11:45.	allen-p

	Message-ID	Date	From	To	Subject	X-From	X-To	X-cc	X-bcc	X-Folder	X-Origin	X-FileName	content	user
0	<18782981.1075855378110.JavaMail.evans@thyme>	2001-05-14 23:39:00	frozenset({'phillip.allen@enron.com'})	frozenset({'tim.belden@enron.com'})	NaN	Phillip K Allen	Tim Belden <Tim Belden/Enron@EnronXGate>	NaN	NaN	\Phillip_Allen_Jan2002_1\Allen, Phillip K.\'Se...	Allen-P	pallen (Non-Privileged).pst	Here is our forecast\n\n	allen-p
1	<15464986.1075855378456.JavaMail.evans@thyme>	2001-05-04 20:51:00	frozenset({'phillip.allen@enron.com'})	frozenset({'john.lavorato@enron.com'})	Re:	Phillip K Allen	John J Lavorato <John J Lavorato/ENRON@enronXg...	NaN	NaN	\Phillip_Allen_Jan2002_1\Allen, Phillip K.\'Se...	Allen-P	pallen (Non-Privileged).pst	Traveling to have a business meeting takes the...	allen-p
2	<24216240.1075855687451.JavaMail.evans@thyme>	2000-10-18 10:00:00	frozenset({'phillip.allen@enron.com'})	frozenset({'leah.arsdall@enron.com'})	Re: test	Phillip K Allen	Leah Van Arsdall	NaN	NaN	\Phillip_Allen_Dec2000\Notes Folders\'sent mail	Allen-P	pallen.nsf	test successful. way to go!!!	allen-p
3	<13505866.1075863688222.JavaMail.evans@thyme>	2000-10-23 13:13:00	frozenset({'phillip.allen@enron.com'})	frozenset({'randall.gay@enron.com'})	NaN	Phillip K Allen	Randall L Gay	NaN	NaN	\Phillip_Allen_Dec2000\Notes Folders\'sent mail	Allen-P	pallen.nsf	Randy,\n\n Can you send me a schedule of the s...	allen-p
4	<30922949.1075863688243.JavaMail.evans@thyme>	2000-08-31 12:07:00	frozenset({'phillip.allen@enron.com'})	frozenset({'greg.piper@enron.com'})	Re: Hello	Phillip K Allen	Greg Piper	NaN	NaN	\Phillip_Allen_Dec2000\Notes Folders\'sent mail	Allen-P	pallen.nsf	Let's shoot for Tuesday at 11:45.	allen-p

	From	To	content
Date
(Mon, 14 May 2001 16:39:00 -0700 (PDT), Mon, 14 May 2001 16:39:00 -0700 (PDT))	(phillip.allen@enron.com)	(tim.belden@enron.com)	Here is our forecast\n\n
(Fri, 4 May 2001 13:51:00 -0700 (PDT), Fri, 4 May 2001 13:51:00 -0700 (PDT))	(phillip.allen@enron.com)	(john.lavorato@enron.com)	Traveling to have a business meeting takes the...
(Wed, 18 Oct 2000 03:00:00 -0700 (PDT), Wed, 18 Oct 2000 03:00:00 -0700 (PDT))	(phillip.allen@enron.com)	(leah.arsdall@enron.com)	test successful. way to go!!!
(Mon, 23 Oct 2000 06:13:00 -0700 (PDT), Mon, 23 Oct 2000 06:13:00 -0700 (PDT))	(phillip.allen@enron.com)	(randall.gay@enron.com)	Randy,\n\n Can you send me a schedule of the s...
(Thu, 31 Aug 2000 05:07:00 -0700 (PDT), Thu, 31 Aug 2000 05:07:00 -0700 (PDT))	(phillip.allen@enron.com)	(greg.piper@enron.com)	Let's shoot for Tuesday at 11:45.