In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style("darkgrid")
import datetime
%matplotlib inline
In [4]:
df = pd.read_csv('~/Downloads/boomtrainsubs.csv',parse_dates=True)
In [5]:
df.dtypes
Out[5]:
UID object
nudgespot::emails object
nudgespot::phones float64
app object
bsin object
cc object
cc.1 float64
clean_campaign_id object
clean_campaign_title object
clean_time object
cleaned object
confirm_ip object
confirm_ip.1 float64
confirm_time object
created_at object
custom_field_1 object
custom_field_2 object
daily_newsletter object
datavalidationgrade object
dstoff float64
dstoff.1 float64
email_type object
euid object
first_name object
gender object
gmtoff float64
gmtoff.1 float64
grade_b object
href object
ip_warmup_1 object
...
ns_operating_system object
ns_region object
ns_timezone object
obscura_day_attendees_-_all_years object
optin_ip object
optin_ip.1 float64
optin_time object
optin_time.1 float64
region object
region.1 float64
regional_event_announcement_lists object
remaining_2015 object
seed_list object
sept_sweeps_(fatherly,_good,_digg,_digital_trends) object
session object
signed_up_at object
source object
timezone object
timezone.1 float64
type object
umbrella_sweeps_group object
unsub_campaign_id object
unsub_campaign_title object
unsub_reason object
unsub_reason_other object
unsub_time object
unsubscribed object
userid object
utm_medium object
utm_source float64
dtype: object
In [6]:
df.head(10)
Out[6]:
UID
nudgespot::emails
nudgespot::phones
app
bsin
cc
cc.1
clean_campaign_id
clean_campaign_title
clean_time
...
umbrella_sweeps_group
unsub_campaign_id
unsub_campaign_title
unsub_reason
unsub_reason_other
unsub_time
unsubscribed
userid
utm_medium
utm_source
0
david@boomtrain.com
david@boomtrain.com
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
...
NaN
afcced40e8
Winner
NaN
NaN
2016-05-20 10:58:41
True
NaN
NaN
NaN
1
mike@atlasobscura.com
mike@atlasobscura.com
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
...
NaN
NaN
NaN
NaN
NaN
NaN
False
NaN
NaN
NaN
2
daniel.sobo@gmail.com
daniel.sobo@gmail.com
NaN
NaN
NaN
US
NaN
NaN
NaN
NaN
...
NaN
NaN
NaN
NaN
NaN
NaN
False
NaN
NaN
NaN
3
vanityplate@gmail.com
vanityplate@gmail.com
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
...
NaN
NaN
NaN
NaN
NaN
NaN
False
NaN
NaN
NaN
4
banyasegol@gmail.com
banyasegol@gmail.com
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
...
NaN
NaN
NaN
NaN
NaN
NaN
False
NaN
NaN
NaN
5
sillyboop10@gmail.com
sillyboop10@gmail.com
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
...
NaN
NaN
NaN
NaN
NaN
NaN
False
NaN
NaN
NaN
6
brett.iredell@gmail.com
brett.iredell@gmail.com
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
...
NaN
NaN
NaN
NaN
NaN
NaN
False
NaN
NaN
NaN
7
max.silvers@yahoo.com
max.silvers@yahoo.com
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
...
NaN
NaN
NaN
NaN
NaN
NaN
False
NaN
NaN
NaN
8
rachel.doyle@atlasobscura.com
rachel.doyle@atlasobscura.com
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
...
NaN
NaN
NaN
NaN
NaN
NaN
False
NaN
NaN
NaN
9
anika.j.burgess@gmail.com
anika.j.burgess@gmail.com
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
...
NaN
NaN
NaN
NaN
NaN
NaN
False
NaN
NaN
NaN
10 rows × 96 columns
In [7]:
df.ix(0,'unsub_time')
Out[7]:
<pandas.core.indexing._IXIndexer at 0x137e68390>
In [13]:
df.ix[0,'unsub_time']
Out[13]:
'2016-05-20 10:58:41'
In [39]:
def split_date(x):
return x.split(' ')[0]
In [40]:
df.unsub_time = df.unsub_time.apply(split_date)
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-40-6e87c3a20f25> in <module>()
----> 1 df.unsub_time = df.unsub_time.apply(split_date)
/Users/Mike/anaconda/lib/python2.7/site-packages/pandas/core/series.pyc in apply(self, func, convert_dtype, args, **kwds)
2235 values = lib.map_infer(values, boxer)
2236
-> 2237 mapped = lib.map_infer(values, f, convert=convert_dtype)
2238 if len(mapped) and isinstance(mapped[0], Series):
2239 from pandas.core.frame import DataFrame
pandas/src/inference.pyx in pandas.lib.map_infer (pandas/lib.c:63043)()
<ipython-input-39-0574d5c0e6e1> in split_date(x)
1 def split_date(x):
----> 2 return x.split(' ')[0]
AttributeError: 'float' object has no attribute 'split'
In [30]:
In [32]:
Out[32]:
UID
nudgespot::emails
nudgespot::phones
app
bsin
cc
cc.1
clean_campaign_id
clean_campaign_title
clean_time
...
umbrella_sweeps_group
unsub_campaign_id
unsub_campaign_title
unsub_reason
unsub_reason_other
unsub_time
unsubscribed
userid
utm_medium
utm_source
0
david@boomtrain.com
david@boomtrain.com
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
...
NaN
afcced40e8
Winner
NaN
NaN
2016-05-20 10:58:41
True
NaN
NaN
NaN
1 rows × 96 columns
In [41]:
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-41-1161e8abe49d> in <module>()
----> 1 unsub_df.unsub_time = unsub_df.unsub_time.apply(split_date)
/Users/Mike/anaconda/lib/python2.7/site-packages/pandas/core/series.pyc in apply(self, func, convert_dtype, args, **kwds)
2235 values = lib.map_infer(values, boxer)
2236
-> 2237 mapped = lib.map_infer(values, f, convert=convert_dtype)
2238 if len(mapped) and isinstance(mapped[0], Series):
2239 from pandas.core.frame import DataFrame
pandas/src/inference.pyx in pandas.lib.map_infer (pandas/lib.c:63043)()
<ipython-input-39-0574d5c0e6e1> in split_date(x)
1 def split_date(x):
----> 2 return x.split(' ')[0]
AttributeError: 'float' object has no attribute 'split'
In [38]:
unsub_df.ix[0,'unsub_time'].split(' ')[0]
Out[38]:
'2016-05-20'
In [75]:
unsub_df = df.dropna(subset=['unsub_time'])
In [76]:
unsub_df.unsub_time = unsub_df.unsub_time.apply(split_date)
In [77]:
unsub_df.columns
Out[77]:
Index([u'UID', u'nudgespot::emails', u'nudgespot::phones', u'app', u'bsin',
u'cc', u'cc.1', u'clean_campaign_id', u'clean_campaign_title',
u'clean_time', u'cleaned', u'confirm_ip', u'confirm_ip.1',
u'confirm_time', u'created_at', u'custom_field_1', u'custom_field_2',
u'daily_newsletter', u'datavalidationgrade', u'dstoff', u'dstoff.1',
u'email_type', u'euid', u'first_name', u'gender', u'gmtoff',
u'gmtoff.1', u'grade_b', u'href', u'ip_warmup_1', u'ip_warmup_2',
u'ip_warmup_3', u'ip_warmup_4', u'ip_warmup_5', u'ip_warmup_6',
u'july_sweeps_(upout,_joyus,_dailysecret)', u'last_changed',
u'last_changed.1', u'last_clicked', u'last_contact', u'last_heard_from',
u'last_name', u'last_opened', u'last_seen', u'last_updated',
u'latitude', u'latitude.1', u'leid', u'longitude', u'longitude.1',
u'marco', u'mc_user_rating', u'member_rating', u'member_rating.1',
u'membership_type', u'name', u'newsletters', u'notes', u'ns_browser',
u'ns_browser_version', u'ns_city', u'ns_country', u'ns_device_type',
u'ns_ip_address', u'ns_latitude', u'ns_longitude',
u'ns_operating_system', u'ns_region', u'ns_timezone',
u'obscura_day_attendees_-_all_years', u'optin_ip', u'optin_ip.1',
u'optin_time', u'optin_time.1', u'region', u'region.1',
u'regional_event_announcement_lists', u'remaining_2015', u'seed_list',
u'sept_sweeps_(fatherly,_good,_digg,_digital_trends)', u'session',
u'signed_up_at', u'source', u'timezone', u'timezone.1', u'type',
u'umbrella_sweeps_group', u'unsub_campaign_id', u'unsub_campaign_title',
u'unsub_reason', u'unsub_reason_other', u'unsub_time', u'unsubscribed',
u'userid', u'utm_medium', u'utm_source'],
dtype='object')
In [78]:
unsub_df[['confirm_time','optin_time','optin_time.1','signed_up_at']]
Out[78]:
confirm_time
optin_time
optin_time.1
signed_up_at
0
NaN
NaN
NaN
2016-05-12T11:53:50Z
15
NaN
NaN
NaN
2014-05-27T17:36:17Z
30
NaN
NaN
NaN
2015-07-13T15:06:17Z
34
NaN
NaN
NaN
2014-05-27T17:23:38Z
35
NaN
NaN
NaN
2014-05-27T17:23:35Z
84
NaN
NaN
NaN
2014-05-27T17:33:06Z
91
NaN
NaN
NaN
2014-05-27T17:34:31Z
106
NaN
NaN
NaN
2015-09-14T20:09:53Z
116
3/21/12 13:39
NaN
NaN
2012-10-02T22:09:22Z
148
NaN
NaN
NaN
2015-09-19T04:59:55Z
186
NaN
NaN
NaN
2016-04-22T05:04:41Z
222
NaN
NaN
NaN
2015-11-29T16:55:10Z
227
NaN
NaN
NaN
2016-02-17T08:06:43Z
231
NaN
NaN
NaN
2015-09-09T17:43:39Z
235
NaN
NaN
NaN
2015-11-25T18:11:42Z
247
NaN
NaN
NaN
2013-11-14T20:01:01Z
254
NaN
NaN
NaN
2016-04-22T09:39:21Z
277
NaN
NaN
NaN
2015-01-12T21:54:29Z
295
NaN
NaN
NaN
2013-11-14T20:03:36Z
345
NaN
NaN
NaN
2016-04-22T13:18:46Z
350
NaN
NaN
NaN
2014-05-27T17:34:43Z
366
NaN
NaN
NaN
2016-02-22T23:46:48Z
376
NaN
NaN
NaN
2015-09-21T11:50:47Z
387
NaN
NaN
NaN
2015-09-30T21:58:23Z
420
NaN
NaN
NaN
2013-11-14T20:02:15Z
433
NaN
NaN
NaN
2015-03-24T18:05:26Z
438
NaN
NaN
NaN
2016-04-17T15:34:29Z
444
NaN
NaN
NaN
2016-04-22T17:36:22Z
495
NaN
NaN
NaN
2014-05-21T20:46:54Z
500
NaN
NaN
NaN
2016-04-22T21:14:02Z
...
...
...
...
...
457193
NaN
NaN
NaN
2016-05-16T14:39:23Z
457194
NaN
NaN
NaN
2016-05-16T14:37:55Z
457195
NaN
NaN
NaN
2016-05-16T14:39:10Z
457196
NaN
NaN
NaN
2016-05-16T14:38:14Z
457197
NaN
NaN
NaN
2016-05-16T14:38:31Z
457198
NaN
NaN
NaN
2016-05-16T14:38:25Z
457199
NaN
NaN
NaN
2016-05-16T14:38:56Z
457200
NaN
NaN
NaN
2016-05-16T14:38:53Z
457201
NaN
NaN
NaN
2016-05-11T12:37:11Z
457202
NaN
NaN
NaN
2016-05-16T14:38:04Z
457203
NaN
NaN
NaN
2016-05-11T12:36:43Z
457204
NaN
NaN
NaN
2016-05-16T14:38:20Z
457205
NaN
NaN
NaN
2016-05-16T14:39:35Z
457206
NaN
NaN
NaN
2016-05-16T14:39:27Z
457207
NaN
NaN
NaN
2016-05-16T14:37:55Z
457208
NaN
NaN
NaN
2016-05-16T14:38:37Z
457209
NaN
NaN
NaN
2016-05-11T12:36:27Z
457210
NaN
NaN
NaN
2016-05-16T14:38:23Z
457211
NaN
NaN
NaN
2016-05-16T14:38:33Z
457212
NaN
NaN
NaN
2016-05-16T14:38:18Z
457213
NaN
NaN
NaN
2016-05-16T14:39:00Z
457214
NaN
NaN
NaN
2016-05-11T12:34:57Z
457215
NaN
NaN
NaN
2016-05-16T14:39:09Z
457216
NaN
NaN
NaN
2016-05-16T14:38:16Z
457217
NaN
NaN
NaN
2016-05-16T14:38:26Z
457218
NaN
NaN
NaN
2016-05-16T14:38:12Z
457219
NaN
NaN
NaN
2016-05-16T14:37:53Z
457220
NaN
NaN
NaN
2016-05-11T12:35:26Z
457221
NaN
NaN
NaN
2016-05-16T14:39:31Z
457228
NaN
NaN
NaN
2016-05-16T14:39:12Z
96809 rows × 4 columns
In [79]:
unsub_df.signed_up_at.isnull().sum()
Out[79]:
18
In [80]:
unsub_df = unsub_df.dropna(subset=['signed_up_at'])
In [81]:
unsub_df.signed_up_at = unsub_df.signed_up_at.apply(lambda x: x.split('T')[0])
In [82]:
unsub_df.signed_up_at = pd.Series([pd.to_datetime(date) for date in unsub_df.signed_up_at])
In [83]:
unsub_df.unsub_time = pd.Series([pd.to_datetime(date) for date in unsub_df.unsub_time])
In [84]:
unsub_df['duration_subscribed'] = unsub_df.unsub_time - unsub_df.signed_up_at
In [85]:
unsub_df['duration_subscribed'].describe()
Out[85]:
count 84779
mean 117 days 20:24:57.936989
std 206 days 06:01:30.738034
min -267 days +00:00:00
25% 9 days 00:00:00
50% 36 days 00:00:00
75% 127 days 00:00:00
max 1609 days 00:00:00
Name: duration_subscribed, dtype: object
In [87]:
unsub_df.columns
Out[87]:
Index([u'UID', u'nudgespot::emails', u'nudgespot::phones', u'app', u'bsin',
u'cc', u'cc.1', u'clean_campaign_id', u'clean_campaign_title',
u'clean_time', u'cleaned', u'confirm_ip', u'confirm_ip.1',
u'confirm_time', u'created_at', u'custom_field_1', u'custom_field_2',
u'daily_newsletter', u'datavalidationgrade', u'dstoff', u'dstoff.1',
u'email_type', u'euid', u'first_name', u'gender', u'gmtoff',
u'gmtoff.1', u'grade_b', u'href', u'ip_warmup_1', u'ip_warmup_2',
u'ip_warmup_3', u'ip_warmup_4', u'ip_warmup_5', u'ip_warmup_6',
u'july_sweeps_(upout,_joyus,_dailysecret)', u'last_changed',
u'last_changed.1', u'last_clicked', u'last_contact', u'last_heard_from',
u'last_name', u'last_opened', u'last_seen', u'last_updated',
u'latitude', u'latitude.1', u'leid', u'longitude', u'longitude.1',
u'marco', u'mc_user_rating', u'member_rating', u'member_rating.1',
u'membership_type', u'name', u'newsletters', u'notes', u'ns_browser',
u'ns_browser_version', u'ns_city', u'ns_country', u'ns_device_type',
u'ns_ip_address', u'ns_latitude', u'ns_longitude',
u'ns_operating_system', u'ns_region', u'ns_timezone',
u'obscura_day_attendees_-_all_years', u'optin_ip', u'optin_ip.1',
u'optin_time', u'optin_time.1', u'region', u'region.1',
u'regional_event_announcement_lists', u'remaining_2015', u'seed_list',
u'sept_sweeps_(fatherly,_good,_digg,_digital_trends)', u'session',
u'signed_up_at', u'source', u'timezone', u'timezone.1', u'type',
u'umbrella_sweeps_group', u'unsub_campaign_id', u'unsub_campaign_title',
u'unsub_reason', u'unsub_reason_other', u'unsub_time', u'unsubscribed',
u'userid', u'utm_medium', u'utm_source', u'duration_subscribed'],
dtype='object')
In [105]:
analysis = unsub_df[['UID','duration_subscribed','marco','umbrella_sweeps_group',
'sept_sweeps_(fatherly,_good,_digg,_digital_trends)','july_sweeps_(upout,_joyus,_dailysecret)',
'mc_user_rating','unsub_time','signed_up_at']]
In [106]:
analysis.head(1)
Out[106]:
UID
duration_subscribed
marco
umbrella_sweeps_group
sept_sweeps_(fatherly,_good,_digg,_digital_trends)
july_sweeps_(upout,_joyus,_dailysecret)
mc_user_rating
unsub_time
signed_up_at
0
david@boomtrain.com
8 days
NaN
NaN
NaN
NaN
3.0
2016-05-20
2016-05-12
In [118]:
marco = analysis.dropna(subset=['marco'])
In [108]:
umbrella = analysis.dropna(subset=['umbrella_sweeps_group'])
In [109]:
sept = analysis.dropna(subset=['sept_sweeps_(fatherly,_good,_digg,_digital_trends)'])
In [110]:
july = analysis.dropna(subset=['july_sweeps_(upout,_joyus,_dailysecret)'])
In [111]:
marco.describe()
Out[111]:
duration_subscribed
mc_user_rating
count
2709
4425.000000
mean
117 days 12:53:25.315614
2.832316
std
216 days 00:15:28.892805
0.637436
min
0 days 00:00:00
1.000000
25%
9 days 00:00:00
3.000000
50%
39 days 00:00:00
3.000000
75%
110 days 00:00:00
3.000000
max
1547 days 00:00:00
5.000000
In [119]:
marco.dtypes
Out[119]:
UID object
duration_subscribed timedelta64[ns]
marco object
umbrella_sweeps_group object
sept_sweeps_(fatherly,_good,_digg,_digital_trends) object
july_sweeps_(upout,_joyus,_dailysecret) object
mc_user_rating float64
unsub_time datetime64[ns]
signed_up_at datetime64[ns]
dtype: object
In [122]:
marco[marco.signed_up_at < '2016-04-12']
Out[122]:
UID
duration_subscribed
marco
umbrella_sweeps_group
sept_sweeps_(fatherly,_good,_digg,_digital_trends)
july_sweeps_(upout,_joyus,_dailysecret)
mc_user_rating
unsub_time
signed_up_at
26311
sldeaguiar@gmail.com
680 days
Marco
NaN
NaN
NaN
3.0
2015-09-28
2013-11-17
26388
mpexton@seniorhousingoptions.org
88 days
Marco
NaN
NaN
NaN
2.0
2015-10-30
2015-08-03
26403
grace.k.fuscoe@vanderbilt.edu
69 days
Marco
NaN
NaN
NaN
3.0
2015-10-11
2015-08-03
26414
marlenewallace@bell.net
94 days
Marco
NaN
NaN
NaN
3.0
2016-04-22
2016-01-19
26430
mccanine@yahoo.com
849 days
Marco
NaN
NaN
NaN
1.0
2014-05-27
2012-01-29
26443
momcenter@gmail.com
26 days
Marco
NaN
NaN
NaN
3.0
2016-04-28
2016-04-02
26489
ro@mostbest.com
39 days
Marco
NaN
NaN
NaN
3.0
2015-11-04
2015-09-26
26501
rendered.sketchless@gmail.com
115 days
Marco
NaN
NaN
NaN
3.0
2015-10-01
2015-06-08
26527
katiekmarshall@msn.com
549 days
Marco
NaN
NaN
NaN
2.0
2015-11-27
2014-05-27
26598
harpaeinars@gmail.com
60 days
Marco
NaN
NaN
NaN
4.0
2015-10-02
2015-08-03
26599
j2tausig@gmail.com
77 days
Marco
NaN
NaN
NaN
3.0
2015-10-19
2015-08-03
26609
johnlisonbee1@gmail.com
7 days
Marco
NaN
NaN
NaN
3.0
2015-10-27
2015-10-20
26658
wcollinson@gmail.com
23 days
Marco
NaN
NaN
NaN
3.0
2016-04-25
2016-04-02
26704
gina_mecham@hotmail.com
702 days
Marco
NaN
NaN
NaN
1.0
2016-04-28
2014-05-27
26755
terrihinkshome@msn.com
18 days
Marco
NaN
NaN
NaN
2.0
2016-04-29
2016-04-11
26820
thiedke61@gmail.com
71 days
Marco
NaN
NaN
NaN
3.0
2015-10-22
2015-08-12
26909
painter5441@icloud.com
74 days
Marco
NaN
NaN
NaN
3.0
2015-10-16
2015-08-03
26912
sokanovic@msn.com
14 days
Marco
NaN
NaN
NaN
3.0
2015-10-31
2015-10-17
26920
jlmccoy76@aol.com
136 days
Marco
NaN
NaN
NaN
2.0
2016-04-25
2015-12-11
26948
kocornik@verizon.net
22 days
Marco
NaN
NaN
NaN
3.0
2015-10-16
2015-09-24
27006
nstei014@fiu.edu
37 days
Marco
NaN
NaN
NaN
3.0
2015-10-30
2015-09-23
27007
muffyj@telus.net
102 days
Marco
NaN
NaN
NaN
3.0
2015-11-13
2015-08-03
27038
klaraewings@gmail.com
1310 days
Marco
NaN
NaN
NaN
2.0
2015-07-21
2011-12-19
27041
laura.cleocaller@gmail.com
23 days
Marco
NaN
NaN
NaN
3.0
2016-04-25
2016-04-02
27090
timpalmer1@hotmail.co.uk
659 days
Marco
NaN
NaN
NaN
1.0
2015-10-30
2014-01-09
27108
red19wings02@yahoo.com
267 days
Marco
NaN
NaN
NaN
3.0
2016-04-26
2015-08-03
27119
incorrectly.anonymous@gmail.com
7 days
Marco
NaN
NaN
NaN
3.0
2016-01-26
2016-01-19
27132
csilevinac@yahoo.com
2 days
Marco
NaN
NaN
NaN
3.0
2015-10-20
2015-10-18
27136
michelesingsbass@shaw.ca
1136 days
Marco
NaN
NaN
NaN
2.0
2015-06-17
2012-05-07
27138
arthur_manigo02@yahoo.ca
500 days
Marco
NaN
NaN
NaN
4.0
2015-10-09
2014-05-27
...
...
...
...
...
...
...
...
...
...
78499
fosters522@gmail.com
536 days
Marco
NaN
NaN
NaN
4.0
2015-07-22
2014-02-01
78520
hltystad@comcast.net
99 days
Marco
NaN
NaN
NaN
2.0
2015-07-14
2015-04-06
78537
thenat93@gmail.com
142 days
Marco
NaN
NaN
NaN
3.0
2015-12-23
2015-08-03
78552
samola123@yahoo.com
382 days
Marco
NaN
NaN
NaN
3.0
2015-11-03
2014-10-17
78569
rolandguzman@gmail.com
146 days
Marco
NaN
NaN
NaN
3.0
2015-12-08
2015-07-15
78594
curtandmichelleolson@gmail.com
87 days
Marco
NaN
NaN
NaN
3.0
2015-04-15
2015-01-18
78624
islandbrenda@icloud.com
85 days
Marco
NaN
NaN
NaN
3.0
2015-06-17
2015-03-24
78641
hjlarson007@yahoo.com
8 days
Marco
NaN
NaN
NaN
3.0
2015-11-01
2015-10-24
78650
cab23025@yahoo.com
372 days
Marco
NaN
NaN
NaN
3.0
2016-01-05
2014-12-29
78680
hemmshoe@gmail.com
2 days
Marco
NaN
NaN
NaN
3.0
2015-12-10
2015-12-08
78720
rachellerobson@gmail.com
24 days
Marco
NaN
NaN
NaN
3.0
2014-12-09
2014-11-15
78728
levergirl2001@yahoo.ca
553 days
Marco
NaN
NaN
NaN
2.0
2015-12-01
2014-05-27
78731
corinnasings@shaw.ca
5 days
Marco
NaN
NaN
NaN
3.0
2015-01-30
2015-01-25
78758
kikinardiz@gmail.com
116 days
Marco
NaN
NaN
NaN
3.0
2015-11-27
2015-08-03
78772
hi9272000@hotmail.com
121 days
Marco
NaN
NaN
NaN
3.0
2015-04-15
2014-12-15
78838
rabbit@gmail.com
315 days
Marco
NaN
NaN
NaN
2.0
2016-01-25
2015-03-16
78878
nancdixon@gmail.com
1 days
Marco
NaN
NaN
NaN
3.0
2014-05-27
2014-05-26
79037
dantina777@yahoo.com
209 days
Marco
NaN
NaN
NaN
3.0
2015-04-17
2014-09-20
79068
jbar5227@yahoo.com
518 days
Marco
NaN
NaN
NaN
3.0
2015-08-21
2014-03-21
79255
jsquires@emmaus.edu
4 days
Marco
NaN
NaN
NaN
3.0
2015-12-15
2015-12-11
79705
mbg1211@yahoo.com
50 days
Marco
NaN
NaN
NaN
3.0
2015-03-24
2015-02-02
79747
elaineyung@ymail.com
409 days
Marco
NaN
NaN
NaN
3.0
2015-09-01
2014-07-19
79774
ajnorton@vt.edu
150 days
Marco
NaN
NaN
NaN
3.0
2015-12-31
2015-08-03
80161
mariosaraiva@hotmail.com
15 days
Marco
NaN
NaN
NaN
3.0
2015-12-23
2015-12-08
80441
kirsten.mclain@yahoo.com
90 days
Marco
NaN
NaN
NaN
3.0
2015-04-07
2015-01-07
81142
sandrahein@sbcglobal.net
20 days
Marco
NaN
NaN
NaN
3.0
2016-01-19
2015-12-30
81570
laurelkerr9@aol.com
151 days
Marco
NaN
NaN
NaN
2.0
2016-01-01
2015-08-03
81644
ramgoat@icloud.com
6 days
Marco
NaN
NaN
NaN
3.0
2016-01-11
2016-01-05
82204
jedemars@yahoo.com
106 days
Marco
NaN
NaN
NaN
3.0
2016-01-16
2015-10-02
82574
match6371@yahoo.com
0 days
Marco
NaN
NaN
NaN
1.0
2016-01-20
2016-01-20
2343 rows × 9 columns
In [126]:
len(df) - df.marco.isnull().sum()
Out[126]:
28882
In [127]:
sept.describe()
Out[127]:
duration_subscribed
mc_user_rating
count
867
911.000000
mean
107 days 15:40:04.152249
2.814490
std
192 days 16:44:05.813042
0.757126
min
0 days 00:00:00
1.000000
25%
10 days 00:00:00
3.000000
50%
39 days 00:00:00
3.000000
75%
104 days 12:00:00
3.000000
max
1522 days 00:00:00
5.000000
In [128]:
umbrella.describe()
Out[128]:
duration_subscribed
mc_user_rating
count
29629
37263.000000
mean
104 days 18:20:37.139289
2.889139
std
198 days 22:23:12.703882
0.658569
min
-208 days +00:00:00
1.000000
25%
9 days 00:00:00
3.000000
50%
29 days 00:00:00
3.000000
75%
100 days 00:00:00
3.000000
max
1561 days 00:00:00
5.000000
In [129]:
july.describe()
Out[129]:
duration_subscribed
mc_user_rating
count
11400
11680.000000
mean
100 days 20:19:19.578947
2.804281
std
196 days 03:07:38.194081
0.716076
min
-86 days +00:00:00
1.000000
25%
8 days 00:00:00
3.000000
50%
26 days 00:00:00
3.000000
75%
94 days 00:00:00
3.000000
max
1590 days 00:00:00
5.000000
In [135]:
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-135-e08e39670f4b> in <module>()
----> 1 df[df.marco.isfinite()]['mc_user_rating']
/Users/Mike/anaconda/lib/python2.7/site-packages/pandas/core/generic.pyc in __getattr__(self, name)
2667 if name in self._info_axis:
2668 return self[name]
-> 2669 return object.__getattribute__(self, name)
2670
2671 def __setattr__(self, name, value):
AttributeError: 'Series' object has no attribute 'isfinite'
In [136]:
marco.shape
Out[136]:
(4425, 9)
In [139]:
df[(df.mc_user_rating >= 3) | (df.signed_up_at > '2016-05-01')].shape
Out[139]:
(220001, 96)
In [143]:
df[(df.mc_user_rating == 2) & (df.signed_up_at < '2016-04-01')].shape
Out[143]:
(166984, 96)
In [ ]:
Content source: facemelters/data-science
Similar notebooks: