In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style("darkgrid")
import datetime

%matplotlib inline

In [4]:
df = pd.read_csv('~/Downloads/boomtrainsubs.csv',parse_dates=True)

In [5]:
df.dtypes


Out[5]:
UID                                                    object
nudgespot::emails                                      object
nudgespot::phones                                     float64
app                                                    object
bsin                                                   object
cc                                                     object
cc.1                                                  float64
clean_campaign_id                                      object
clean_campaign_title                                   object
clean_time                                             object
cleaned                                                object
confirm_ip                                             object
confirm_ip.1                                          float64
confirm_time                                           object
created_at                                             object
custom_field_1                                         object
custom_field_2                                         object
daily_newsletter                                       object
datavalidationgrade                                    object
dstoff                                                float64
dstoff.1                                              float64
email_type                                             object
euid                                                   object
first_name                                             object
gender                                                 object
gmtoff                                                float64
gmtoff.1                                              float64
grade_b                                                object
href                                                   object
ip_warmup_1                                            object
                                                       ...   
ns_operating_system                                    object
ns_region                                              object
ns_timezone                                            object
obscura_day_attendees_-_all_years                      object
optin_ip                                               object
optin_ip.1                                            float64
optin_time                                             object
optin_time.1                                          float64
region                                                 object
region.1                                              float64
regional_event_announcement_lists                      object
remaining_2015                                         object
seed_list                                              object
sept_sweeps_(fatherly,_good,_digg,_digital_trends)     object
session                                                object
signed_up_at                                           object
source                                                 object
timezone                                               object
timezone.1                                            float64
type                                                   object
umbrella_sweeps_group                                  object
unsub_campaign_id                                      object
unsub_campaign_title                                   object
unsub_reason                                           object
unsub_reason_other                                     object
unsub_time                                             object
unsubscribed                                           object
userid                                                 object
utm_medium                                             object
utm_source                                            float64
dtype: object

In [6]:
df.head(10)


Out[6]:
UID nudgespot::emails nudgespot::phones app bsin cc cc.1 clean_campaign_id clean_campaign_title clean_time ... umbrella_sweeps_group unsub_campaign_id unsub_campaign_title unsub_reason unsub_reason_other unsub_time unsubscribed userid utm_medium utm_source
0 david@boomtrain.com david@boomtrain.com NaN NaN NaN NaN NaN NaN NaN NaN ... NaN afcced40e8 Winner NaN NaN 2016-05-20 10:58:41 True NaN NaN NaN
1 mike@atlasobscura.com mike@atlasobscura.com NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN False NaN NaN NaN
2 daniel.sobo@gmail.com daniel.sobo@gmail.com NaN NaN NaN US NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN False NaN NaN NaN
3 vanityplate@gmail.com vanityplate@gmail.com NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN False NaN NaN NaN
4 banyasegol@gmail.com banyasegol@gmail.com NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN False NaN NaN NaN
5 sillyboop10@gmail.com sillyboop10@gmail.com NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN False NaN NaN NaN
6 brett.iredell@gmail.com brett.iredell@gmail.com NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN False NaN NaN NaN
7 max.silvers@yahoo.com max.silvers@yahoo.com NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN False NaN NaN NaN
8 rachel.doyle@atlasobscura.com rachel.doyle@atlasobscura.com NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN False NaN NaN NaN
9 anika.j.burgess@gmail.com anika.j.burgess@gmail.com NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN False NaN NaN NaN

10 rows × 96 columns


In [7]:
df.ix(0,'unsub_time')


Out[7]:
<pandas.core.indexing._IXIndexer at 0x137e68390>

In [13]:
df.ix[0,'unsub_time']


Out[13]:
'2016-05-20 10:58:41'

In [39]:
def split_date(x):
    return x.split(' ')[0]

In [40]:
df.unsub_time = df.unsub_time.apply(split_date)


---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-40-6e87c3a20f25> in <module>()
----> 1 df.unsub_time = df.unsub_time.apply(split_date)

/Users/Mike/anaconda/lib/python2.7/site-packages/pandas/core/series.pyc in apply(self, func, convert_dtype, args, **kwds)
   2235             values = lib.map_infer(values, boxer)
   2236 
-> 2237         mapped = lib.map_infer(values, f, convert=convert_dtype)
   2238         if len(mapped) and isinstance(mapped[0], Series):
   2239             from pandas.core.frame import DataFrame

pandas/src/inference.pyx in pandas.lib.map_infer (pandas/lib.c:63043)()

<ipython-input-39-0574d5c0e6e1> in split_date(x)
      1 def split_date(x):
----> 2     return x.split(' ')[0]

AttributeError: 'float' object has no attribute 'split'

In [30]:


In [32]:



Out[32]:
UID nudgespot::emails nudgespot::phones app bsin cc cc.1 clean_campaign_id clean_campaign_title clean_time ... umbrella_sweeps_group unsub_campaign_id unsub_campaign_title unsub_reason unsub_reason_other unsub_time unsubscribed userid utm_medium utm_source
0 david@boomtrain.com david@boomtrain.com NaN NaN NaN NaN NaN NaN NaN NaN ... NaN afcced40e8 Winner NaN NaN 2016-05-20 10:58:41 True NaN NaN NaN

1 rows × 96 columns


In [41]:



---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-41-1161e8abe49d> in <module>()
----> 1 unsub_df.unsub_time = unsub_df.unsub_time.apply(split_date)

/Users/Mike/anaconda/lib/python2.7/site-packages/pandas/core/series.pyc in apply(self, func, convert_dtype, args, **kwds)
   2235             values = lib.map_infer(values, boxer)
   2236 
-> 2237         mapped = lib.map_infer(values, f, convert=convert_dtype)
   2238         if len(mapped) and isinstance(mapped[0], Series):
   2239             from pandas.core.frame import DataFrame

pandas/src/inference.pyx in pandas.lib.map_infer (pandas/lib.c:63043)()

<ipython-input-39-0574d5c0e6e1> in split_date(x)
      1 def split_date(x):
----> 2     return x.split(' ')[0]

AttributeError: 'float' object has no attribute 'split'

In [38]:
unsub_df.ix[0,'unsub_time'].split(' ')[0]


Out[38]:
'2016-05-20'

In [75]:
unsub_df = df.dropna(subset=['unsub_time'])

In [76]:
unsub_df.unsub_time = unsub_df.unsub_time.apply(split_date)

In [77]:
unsub_df.columns


Out[77]:
Index([u'UID', u'nudgespot::emails', u'nudgespot::phones', u'app', u'bsin',
       u'cc', u'cc.1', u'clean_campaign_id', u'clean_campaign_title',
       u'clean_time', u'cleaned', u'confirm_ip', u'confirm_ip.1',
       u'confirm_time', u'created_at', u'custom_field_1', u'custom_field_2',
       u'daily_newsletter', u'datavalidationgrade', u'dstoff', u'dstoff.1',
       u'email_type', u'euid', u'first_name', u'gender', u'gmtoff',
       u'gmtoff.1', u'grade_b', u'href', u'ip_warmup_1', u'ip_warmup_2',
       u'ip_warmup_3', u'ip_warmup_4', u'ip_warmup_5', u'ip_warmup_6',
       u'july_sweeps_(upout,_joyus,_dailysecret)', u'last_changed',
       u'last_changed.1', u'last_clicked', u'last_contact', u'last_heard_from',
       u'last_name', u'last_opened', u'last_seen', u'last_updated',
       u'latitude', u'latitude.1', u'leid', u'longitude', u'longitude.1',
       u'marco', u'mc_user_rating', u'member_rating', u'member_rating.1',
       u'membership_type', u'name', u'newsletters', u'notes', u'ns_browser',
       u'ns_browser_version', u'ns_city', u'ns_country', u'ns_device_type',
       u'ns_ip_address', u'ns_latitude', u'ns_longitude',
       u'ns_operating_system', u'ns_region', u'ns_timezone',
       u'obscura_day_attendees_-_all_years', u'optin_ip', u'optin_ip.1',
       u'optin_time', u'optin_time.1', u'region', u'region.1',
       u'regional_event_announcement_lists', u'remaining_2015', u'seed_list',
       u'sept_sweeps_(fatherly,_good,_digg,_digital_trends)', u'session',
       u'signed_up_at', u'source', u'timezone', u'timezone.1', u'type',
       u'umbrella_sweeps_group', u'unsub_campaign_id', u'unsub_campaign_title',
       u'unsub_reason', u'unsub_reason_other', u'unsub_time', u'unsubscribed',
       u'userid', u'utm_medium', u'utm_source'],
      dtype='object')

In [78]:
unsub_df[['confirm_time','optin_time','optin_time.1','signed_up_at']]


Out[78]:
confirm_time optin_time optin_time.1 signed_up_at
0 NaN NaN NaN 2016-05-12T11:53:50Z
15 NaN NaN NaN 2014-05-27T17:36:17Z
30 NaN NaN NaN 2015-07-13T15:06:17Z
34 NaN NaN NaN 2014-05-27T17:23:38Z
35 NaN NaN NaN 2014-05-27T17:23:35Z
84 NaN NaN NaN 2014-05-27T17:33:06Z
91 NaN NaN NaN 2014-05-27T17:34:31Z
106 NaN NaN NaN 2015-09-14T20:09:53Z
116 3/21/12 13:39 NaN NaN 2012-10-02T22:09:22Z
148 NaN NaN NaN 2015-09-19T04:59:55Z
186 NaN NaN NaN 2016-04-22T05:04:41Z
222 NaN NaN NaN 2015-11-29T16:55:10Z
227 NaN NaN NaN 2016-02-17T08:06:43Z
231 NaN NaN NaN 2015-09-09T17:43:39Z
235 NaN NaN NaN 2015-11-25T18:11:42Z
247 NaN NaN NaN 2013-11-14T20:01:01Z
254 NaN NaN NaN 2016-04-22T09:39:21Z
277 NaN NaN NaN 2015-01-12T21:54:29Z
295 NaN NaN NaN 2013-11-14T20:03:36Z
345 NaN NaN NaN 2016-04-22T13:18:46Z
350 NaN NaN NaN 2014-05-27T17:34:43Z
366 NaN NaN NaN 2016-02-22T23:46:48Z
376 NaN NaN NaN 2015-09-21T11:50:47Z
387 NaN NaN NaN 2015-09-30T21:58:23Z
420 NaN NaN NaN 2013-11-14T20:02:15Z
433 NaN NaN NaN 2015-03-24T18:05:26Z
438 NaN NaN NaN 2016-04-17T15:34:29Z
444 NaN NaN NaN 2016-04-22T17:36:22Z
495 NaN NaN NaN 2014-05-21T20:46:54Z
500 NaN NaN NaN 2016-04-22T21:14:02Z
... ... ... ... ...
457193 NaN NaN NaN 2016-05-16T14:39:23Z
457194 NaN NaN NaN 2016-05-16T14:37:55Z
457195 NaN NaN NaN 2016-05-16T14:39:10Z
457196 NaN NaN NaN 2016-05-16T14:38:14Z
457197 NaN NaN NaN 2016-05-16T14:38:31Z
457198 NaN NaN NaN 2016-05-16T14:38:25Z
457199 NaN NaN NaN 2016-05-16T14:38:56Z
457200 NaN NaN NaN 2016-05-16T14:38:53Z
457201 NaN NaN NaN 2016-05-11T12:37:11Z
457202 NaN NaN NaN 2016-05-16T14:38:04Z
457203 NaN NaN NaN 2016-05-11T12:36:43Z
457204 NaN NaN NaN 2016-05-16T14:38:20Z
457205 NaN NaN NaN 2016-05-16T14:39:35Z
457206 NaN NaN NaN 2016-05-16T14:39:27Z
457207 NaN NaN NaN 2016-05-16T14:37:55Z
457208 NaN NaN NaN 2016-05-16T14:38:37Z
457209 NaN NaN NaN 2016-05-11T12:36:27Z
457210 NaN NaN NaN 2016-05-16T14:38:23Z
457211 NaN NaN NaN 2016-05-16T14:38:33Z
457212 NaN NaN NaN 2016-05-16T14:38:18Z
457213 NaN NaN NaN 2016-05-16T14:39:00Z
457214 NaN NaN NaN 2016-05-11T12:34:57Z
457215 NaN NaN NaN 2016-05-16T14:39:09Z
457216 NaN NaN NaN 2016-05-16T14:38:16Z
457217 NaN NaN NaN 2016-05-16T14:38:26Z
457218 NaN NaN NaN 2016-05-16T14:38:12Z
457219 NaN NaN NaN 2016-05-16T14:37:53Z
457220 NaN NaN NaN 2016-05-11T12:35:26Z
457221 NaN NaN NaN 2016-05-16T14:39:31Z
457228 NaN NaN NaN 2016-05-16T14:39:12Z

96809 rows × 4 columns


In [79]:
unsub_df.signed_up_at.isnull().sum()


Out[79]:
18

In [80]:
unsub_df = unsub_df.dropna(subset=['signed_up_at'])

In [81]:
unsub_df.signed_up_at = unsub_df.signed_up_at.apply(lambda x: x.split('T')[0])

In [82]:
unsub_df.signed_up_at = pd.Series([pd.to_datetime(date) for date in unsub_df.signed_up_at])

In [83]:
unsub_df.unsub_time = pd.Series([pd.to_datetime(date) for date in unsub_df.unsub_time])

In [84]:
unsub_df['duration_subscribed'] = unsub_df.unsub_time - unsub_df.signed_up_at

In [85]:
unsub_df['duration_subscribed'].describe()


Out[85]:
count                       84779
mean     117 days 20:24:57.936989
std      206 days 06:01:30.738034
min           -267 days +00:00:00
25%               9 days 00:00:00
50%              36 days 00:00:00
75%             127 days 00:00:00
max            1609 days 00:00:00
Name: duration_subscribed, dtype: object

In [87]:
unsub_df.columns


Out[87]:
Index([u'UID', u'nudgespot::emails', u'nudgespot::phones', u'app', u'bsin',
       u'cc', u'cc.1', u'clean_campaign_id', u'clean_campaign_title',
       u'clean_time', u'cleaned', u'confirm_ip', u'confirm_ip.1',
       u'confirm_time', u'created_at', u'custom_field_1', u'custom_field_2',
       u'daily_newsletter', u'datavalidationgrade', u'dstoff', u'dstoff.1',
       u'email_type', u'euid', u'first_name', u'gender', u'gmtoff',
       u'gmtoff.1', u'grade_b', u'href', u'ip_warmup_1', u'ip_warmup_2',
       u'ip_warmup_3', u'ip_warmup_4', u'ip_warmup_5', u'ip_warmup_6',
       u'july_sweeps_(upout,_joyus,_dailysecret)', u'last_changed',
       u'last_changed.1', u'last_clicked', u'last_contact', u'last_heard_from',
       u'last_name', u'last_opened', u'last_seen', u'last_updated',
       u'latitude', u'latitude.1', u'leid', u'longitude', u'longitude.1',
       u'marco', u'mc_user_rating', u'member_rating', u'member_rating.1',
       u'membership_type', u'name', u'newsletters', u'notes', u'ns_browser',
       u'ns_browser_version', u'ns_city', u'ns_country', u'ns_device_type',
       u'ns_ip_address', u'ns_latitude', u'ns_longitude',
       u'ns_operating_system', u'ns_region', u'ns_timezone',
       u'obscura_day_attendees_-_all_years', u'optin_ip', u'optin_ip.1',
       u'optin_time', u'optin_time.1', u'region', u'region.1',
       u'regional_event_announcement_lists', u'remaining_2015', u'seed_list',
       u'sept_sweeps_(fatherly,_good,_digg,_digital_trends)', u'session',
       u'signed_up_at', u'source', u'timezone', u'timezone.1', u'type',
       u'umbrella_sweeps_group', u'unsub_campaign_id', u'unsub_campaign_title',
       u'unsub_reason', u'unsub_reason_other', u'unsub_time', u'unsubscribed',
       u'userid', u'utm_medium', u'utm_source', u'duration_subscribed'],
      dtype='object')

In [105]:
analysis = unsub_df[['UID','duration_subscribed','marco','umbrella_sweeps_group',
                    'sept_sweeps_(fatherly,_good,_digg,_digital_trends)','july_sweeps_(upout,_joyus,_dailysecret)',
                     'mc_user_rating','unsub_time','signed_up_at']]

In [106]:
analysis.head(1)


Out[106]:
UID duration_subscribed marco umbrella_sweeps_group sept_sweeps_(fatherly,_good,_digg,_digital_trends) july_sweeps_(upout,_joyus,_dailysecret) mc_user_rating unsub_time signed_up_at
0 david@boomtrain.com 8 days NaN NaN NaN NaN 3.0 2016-05-20 2016-05-12

In [118]:
marco = analysis.dropna(subset=['marco'])

In [108]:
umbrella = analysis.dropna(subset=['umbrella_sweeps_group'])

In [109]:
sept = analysis.dropna(subset=['sept_sweeps_(fatherly,_good,_digg,_digital_trends)'])

In [110]:
july = analysis.dropna(subset=['july_sweeps_(upout,_joyus,_dailysecret)'])

In [111]:
marco.describe()


Out[111]:
duration_subscribed mc_user_rating
count 2709 4425.000000
mean 117 days 12:53:25.315614 2.832316
std 216 days 00:15:28.892805 0.637436
min 0 days 00:00:00 1.000000
25% 9 days 00:00:00 3.000000
50% 39 days 00:00:00 3.000000
75% 110 days 00:00:00 3.000000
max 1547 days 00:00:00 5.000000

In [119]:
marco.dtypes


Out[119]:
UID                                                            object
duration_subscribed                                   timedelta64[ns]
marco                                                          object
umbrella_sweeps_group                                          object
sept_sweeps_(fatherly,_good,_digg,_digital_trends)             object
july_sweeps_(upout,_joyus,_dailysecret)                        object
mc_user_rating                                                float64
unsub_time                                             datetime64[ns]
signed_up_at                                           datetime64[ns]
dtype: object

In [122]:
marco[marco.signed_up_at < '2016-04-12']


Out[122]:
UID duration_subscribed marco umbrella_sweeps_group sept_sweeps_(fatherly,_good,_digg,_digital_trends) july_sweeps_(upout,_joyus,_dailysecret) mc_user_rating unsub_time signed_up_at
26311 sldeaguiar@gmail.com 680 days Marco NaN NaN NaN 3.0 2015-09-28 2013-11-17
26388 mpexton@seniorhousingoptions.org 88 days Marco NaN NaN NaN 2.0 2015-10-30 2015-08-03
26403 grace.k.fuscoe@vanderbilt.edu 69 days Marco NaN NaN NaN 3.0 2015-10-11 2015-08-03
26414 marlenewallace@bell.net 94 days Marco NaN NaN NaN 3.0 2016-04-22 2016-01-19
26430 mccanine@yahoo.com 849 days Marco NaN NaN NaN 1.0 2014-05-27 2012-01-29
26443 momcenter@gmail.com 26 days Marco NaN NaN NaN 3.0 2016-04-28 2016-04-02
26489 ro@mostbest.com 39 days Marco NaN NaN NaN 3.0 2015-11-04 2015-09-26
26501 rendered.sketchless@gmail.com 115 days Marco NaN NaN NaN 3.0 2015-10-01 2015-06-08
26527 katiekmarshall@msn.com 549 days Marco NaN NaN NaN 2.0 2015-11-27 2014-05-27
26598 harpaeinars@gmail.com 60 days Marco NaN NaN NaN 4.0 2015-10-02 2015-08-03
26599 j2tausig@gmail.com 77 days Marco NaN NaN NaN 3.0 2015-10-19 2015-08-03
26609 johnlisonbee1@gmail.com 7 days Marco NaN NaN NaN 3.0 2015-10-27 2015-10-20
26658 wcollinson@gmail.com 23 days Marco NaN NaN NaN 3.0 2016-04-25 2016-04-02
26704 gina_mecham@hotmail.com 702 days Marco NaN NaN NaN 1.0 2016-04-28 2014-05-27
26755 terrihinkshome@msn.com 18 days Marco NaN NaN NaN 2.0 2016-04-29 2016-04-11
26820 thiedke61@gmail.com 71 days Marco NaN NaN NaN 3.0 2015-10-22 2015-08-12
26909 painter5441@icloud.com 74 days Marco NaN NaN NaN 3.0 2015-10-16 2015-08-03
26912 sokanovic@msn.com 14 days Marco NaN NaN NaN 3.0 2015-10-31 2015-10-17
26920 jlmccoy76@aol.com 136 days Marco NaN NaN NaN 2.0 2016-04-25 2015-12-11
26948 kocornik@verizon.net 22 days Marco NaN NaN NaN 3.0 2015-10-16 2015-09-24
27006 nstei014@fiu.edu 37 days Marco NaN NaN NaN 3.0 2015-10-30 2015-09-23
27007 muffyj@telus.net 102 days Marco NaN NaN NaN 3.0 2015-11-13 2015-08-03
27038 klaraewings@gmail.com 1310 days Marco NaN NaN NaN 2.0 2015-07-21 2011-12-19
27041 laura.cleocaller@gmail.com 23 days Marco NaN NaN NaN 3.0 2016-04-25 2016-04-02
27090 timpalmer1@hotmail.co.uk 659 days Marco NaN NaN NaN 1.0 2015-10-30 2014-01-09
27108 red19wings02@yahoo.com 267 days Marco NaN NaN NaN 3.0 2016-04-26 2015-08-03
27119 incorrectly.anonymous@gmail.com 7 days Marco NaN NaN NaN 3.0 2016-01-26 2016-01-19
27132 csilevinac@yahoo.com 2 days Marco NaN NaN NaN 3.0 2015-10-20 2015-10-18
27136 michelesingsbass@shaw.ca 1136 days Marco NaN NaN NaN 2.0 2015-06-17 2012-05-07
27138 arthur_manigo02@yahoo.ca 500 days Marco NaN NaN NaN 4.0 2015-10-09 2014-05-27
... ... ... ... ... ... ... ... ... ...
78499 fosters522@gmail.com 536 days Marco NaN NaN NaN 4.0 2015-07-22 2014-02-01
78520 hltystad@comcast.net 99 days Marco NaN NaN NaN 2.0 2015-07-14 2015-04-06
78537 thenat93@gmail.com 142 days Marco NaN NaN NaN 3.0 2015-12-23 2015-08-03
78552 samola123@yahoo.com 382 days Marco NaN NaN NaN 3.0 2015-11-03 2014-10-17
78569 rolandguzman@gmail.com 146 days Marco NaN NaN NaN 3.0 2015-12-08 2015-07-15
78594 curtandmichelleolson@gmail.com 87 days Marco NaN NaN NaN 3.0 2015-04-15 2015-01-18
78624 islandbrenda@icloud.com 85 days Marco NaN NaN NaN 3.0 2015-06-17 2015-03-24
78641 hjlarson007@yahoo.com 8 days Marco NaN NaN NaN 3.0 2015-11-01 2015-10-24
78650 cab23025@yahoo.com 372 days Marco NaN NaN NaN 3.0 2016-01-05 2014-12-29
78680 hemmshoe@gmail.com 2 days Marco NaN NaN NaN 3.0 2015-12-10 2015-12-08
78720 rachellerobson@gmail.com 24 days Marco NaN NaN NaN 3.0 2014-12-09 2014-11-15
78728 levergirl2001@yahoo.ca 553 days Marco NaN NaN NaN 2.0 2015-12-01 2014-05-27
78731 corinnasings@shaw.ca 5 days Marco NaN NaN NaN 3.0 2015-01-30 2015-01-25
78758 kikinardiz@gmail.com 116 days Marco NaN NaN NaN 3.0 2015-11-27 2015-08-03
78772 hi9272000@hotmail.com 121 days Marco NaN NaN NaN 3.0 2015-04-15 2014-12-15
78838 rabbit@gmail.com 315 days Marco NaN NaN NaN 2.0 2016-01-25 2015-03-16
78878 nancdixon@gmail.com 1 days Marco NaN NaN NaN 3.0 2014-05-27 2014-05-26
79037 dantina777@yahoo.com 209 days Marco NaN NaN NaN 3.0 2015-04-17 2014-09-20
79068 jbar5227@yahoo.com 518 days Marco NaN NaN NaN 3.0 2015-08-21 2014-03-21
79255 jsquires@emmaus.edu 4 days Marco NaN NaN NaN 3.0 2015-12-15 2015-12-11
79705 mbg1211@yahoo.com 50 days Marco NaN NaN NaN 3.0 2015-03-24 2015-02-02
79747 elaineyung@ymail.com 409 days Marco NaN NaN NaN 3.0 2015-09-01 2014-07-19
79774 ajnorton@vt.edu 150 days Marco NaN NaN NaN 3.0 2015-12-31 2015-08-03
80161 mariosaraiva@hotmail.com 15 days Marco NaN NaN NaN 3.0 2015-12-23 2015-12-08
80441 kirsten.mclain@yahoo.com 90 days Marco NaN NaN NaN 3.0 2015-04-07 2015-01-07
81142 sandrahein@sbcglobal.net 20 days Marco NaN NaN NaN 3.0 2016-01-19 2015-12-30
81570 laurelkerr9@aol.com 151 days Marco NaN NaN NaN 2.0 2016-01-01 2015-08-03
81644 ramgoat@icloud.com 6 days Marco NaN NaN NaN 3.0 2016-01-11 2016-01-05
82204 jedemars@yahoo.com 106 days Marco NaN NaN NaN 3.0 2016-01-16 2015-10-02
82574 match6371@yahoo.com 0 days Marco NaN NaN NaN 1.0 2016-01-20 2016-01-20

2343 rows × 9 columns


In [126]:
len(df) - df.marco.isnull().sum()


Out[126]:
28882

In [127]:
sept.describe()


Out[127]:
duration_subscribed mc_user_rating
count 867 911.000000
mean 107 days 15:40:04.152249 2.814490
std 192 days 16:44:05.813042 0.757126
min 0 days 00:00:00 1.000000
25% 10 days 00:00:00 3.000000
50% 39 days 00:00:00 3.000000
75% 104 days 12:00:00 3.000000
max 1522 days 00:00:00 5.000000

In [128]:
umbrella.describe()


Out[128]:
duration_subscribed mc_user_rating
count 29629 37263.000000
mean 104 days 18:20:37.139289 2.889139
std 198 days 22:23:12.703882 0.658569
min -208 days +00:00:00 1.000000
25% 9 days 00:00:00 3.000000
50% 29 days 00:00:00 3.000000
75% 100 days 00:00:00 3.000000
max 1561 days 00:00:00 5.000000

In [129]:
july.describe()


Out[129]:
duration_subscribed mc_user_rating
count 11400 11680.000000
mean 100 days 20:19:19.578947 2.804281
std 196 days 03:07:38.194081 0.716076
min -86 days +00:00:00 1.000000
25% 8 days 00:00:00 3.000000
50% 26 days 00:00:00 3.000000
75% 94 days 00:00:00 3.000000
max 1590 days 00:00:00 5.000000

In [135]:



---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-135-e08e39670f4b> in <module>()
----> 1 df[df.marco.isfinite()]['mc_user_rating']

/Users/Mike/anaconda/lib/python2.7/site-packages/pandas/core/generic.pyc in __getattr__(self, name)
   2667             if name in self._info_axis:
   2668                 return self[name]
-> 2669             return object.__getattribute__(self, name)
   2670 
   2671     def __setattr__(self, name, value):

AttributeError: 'Series' object has no attribute 'isfinite'

In [136]:
marco.shape


Out[136]:
(4425, 9)

In [139]:
df[(df.mc_user_rating >= 3) | (df.signed_up_at > '2016-05-01')].shape


Out[139]:
(220001, 96)

In [143]:
df[(df.mc_user_rating == 2) & (df.signed_up_at < '2016-04-01')].shape


Out[143]:
(166984, 96)

In [ ]: