In [1]:
%matplotlib inline
import numpy
import pandas
import matplotlib.pyplot as plt
from ggplot import *

plt.rcParams['figure.figsize'] = (16.0, 8.0)


/Users/cruz/dev/udacity/venv/lib/python2.7/site-packages/matplotlib/font_manager.py:273: UserWarning: Matplotlib is building the font cache using fc-list. This may take a moment.
  warnings.warn('Matplotlib is building the font cache using fc-list. This may take a moment.')

In [2]:
df_raw = pandas.read_csv('data/mobile_app_user_dataset.csv', delimiter=';')
df_raw


/Users/cruz/dev/udacity/venv/lib/python2.7/site-packages/IPython/core/interactiveshell.py:2902: DtypeWarning: Columns (0,3,4,10,12,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160) have mixed types. Specify dtype option on import or set low_memory=False.
  interactivity=interactivity, compiler=compiler, result=result)
Out[2]:
ID StartDate EndDate Response Status Participant Type Q1_1_TEXT Q1_2_TEXT Q1_3_TEXT Q1_4_TEXT Q1_5_TEXT ... Q30_1 Q30_2 Q30_3 Q30_4 Q30_5 Q30_6 Q30_7 Q30_8 Q30_9 Q30_10
0 Unique ID for each participant StartDate EndDate 0=incomplete response, 1=complete response, 2=... 1=ours, 2=panel Browser Meta Info-Browser Browser Meta Info-Version Browser Meta Info-Operating System Browser Meta Info-Screen Resolution Browser Meta Info-Flash Version ... Your personality may influence the types of ap... Your personality may influence the types of ap... Your personality may influence the types of ap... Your personality may influence the types of ap... Your personality may influence the types of ap... Your personality may influence the types of ap... Your personality may influence the types of ap... Your personality may influence the types of ap... Your personality may influence the types of ap... Your personality may influence the types of ap...
1 1 26/09/12 07:46 26/09/12 07:46 2 1 Chrome 21.0.1180.89 WOW64 1280x800 11.3.31 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
2 2 26/09/12 07:45 26/09/12 07:56 1 1 Safari iPhone 6 CPU iPhone OS 6_0 like Mac OS X 320x480 -1 ... 6 3 7 2 6 3 4 3 4 4
3 3 26/09/12 07:45 26/09/12 08:01 1 1 Safari 6 CPU OS 6_0 like Mac OS X 768x1024 -1 ... 4 4 5 2 3 3 5 3 5 3
4 4 26/09/12 16:58 26/09/12 17:05 1 1 Firefox 15.0.1 Intel Mac OS X 10.6 1920x1200 11.4.402 ... 4 3 6 3 5 5 5 2 5 3
5 5 27/09/12 04:16 27/09/12 04:24 1 1 Chrome 22.0.1229.79 Intel Mac OS X 10_7_4 1280x800 11.4.402 ... 2 6 4 3 6 5 7 3 5 3
6 6 27/09/12 08:50 27/09/12 08:56 1 1 Chrome 21.0.1180.89 WOW64 1920x1080 11.3.31 ... 3 2 6 2 6 4 3 2 5 2
7 7 28/09/12 07:34 28/09/12 07:52 1 1 Chrome 22.0.1229.79 Macintosh 1280x800 11.4.402 ... 4 3 4 3 5 4 5 6 5 4
8 8 28/09/12 10:07 28/09/12 10:20 1 1 Firefox 15.0.1 Windows NT 6.1 1366x768 11.4.402 ... 4 4 4 1 5 6 4 2 5 6
9 9 28/09/12 11:32 28/09/12 11:40 1 1 Firefox 15.0.1 Windows NT 6.1 1600x900 11.4.402 ... 4 3 4 6 6 5 4 5 4 6
10 10 28/09/12 12:22 28/09/12 12:46 3 1 Chrome 22.0.1229.91 Windows NT 6.1 1366x768 11.3.31 ... 5 5 4 5 5 5 4 5 4 4
11 11 28/09/12 13:23 28/09/12 13:35 1 1 Safari 4 Android 2.3.5 800x1184 11.1.111 ... 5 3 5 4 4 6 7 7 6 6
12 12 28/09/12 13:28 28/09/12 13:39 3 1 Safari iPhone 5,1 iPod 320x480 -1 ... 3 6 2 3 4 3 4 3 3 3
13 13 28/09/12 11:00 02/10/12 06:45 1 1 Chrome 22.0.1229.79 Windows NT 5.1 1920x1200 11.3.31 ... 6 3 5 4 5 3 5 5 5 3
14 14 02/10/12 10:27 02/10/12 10:27 2 1 Chrome 22.0.1229.79 Windows NT 5.1 1280x768 11.3.31 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
15 15 02/10/12 16:34 02/10/12 16:34 2 1 Chrome 22.0.1229.79 Windows NT 6.1 1600x900 11.3.31 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
16 16 03/10/12 11:10 03/10/12 11:21 1 1 Firefox 3.5.16) Linux x86_64 1080x1920 11.2.202 ... 5 5 3 7 4 6 6 5 3 4
17 17 03/10/12 14:38 03/10/12 14:43 1 1 Chrome 22.0.1229.79 Windows NT 6.1 1920x1080 11.3.31 ... 4 3 2 3 7 4 5 7 3 4
18 18 03/10/12 15:11 03/10/12 15:26 1 1 Firefox 15.0.1 Windows NT 6.1 1600x900 11.4.402 ... 4 4 5 3 5 6 5 3 5 5
19 19 03/10/12 15:45 03/10/12 15:51 1 1 Chrome 22.0.1229.79 Windows NT 6.1 1920x1080 11.3.31 ... 4 6 2 7 7 3 4 7 1 4
20 20 03/10/12 15:50 03/10/12 16:00 1 1 Chrome 22.0.1229.79 Windows NT 6.1 1366x768 11.3.31 ... 5 4 6 5 4 4 7 1 3 3
21 21 03/10/12 16:02 03/10/12 16:09 1 1 Chrome 21.0.1180.90 Macintosh 1920x1200 10.3.183 ... 4 3 6 5 5 6 6 5 4 4
22 22 03/10/12 15:58 03/10/12 16:13 1 1 Firefox 15.0.1 Linux x86_64 1920x1080 11.2.202 ... 1 4 5 5 6 6 3 2 5 3
23 23 03/10/12 15:49 03/10/12 16:27 1 1 Firefox 15.0.1 Ubuntu 1600x900 10.1.999.Gnash ... 3 7 6 1 7 5 6 1 6 1
24 24 03/10/12 16:30 03/10/12 16:30 2 1 Chrome 22.0.1229.79 Windows NT 6.1 1280x1024 11.3.31 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
25 25 03/10/12 16:30 03/10/12 16:35 1 1 Safari 6.0.1 Macintosh 1920x1200 11.4.402 ... 3 5 5 2 6 6 5 2 5 3
26 26 03/10/12 16:30 03/10/12 16:37 1 1 Firefox 10.0.7 Linux x86_64 1680x1050 11.0.1 ... 1 1 1 1 1 1 1 1 1 1
27 27 03/10/12 16:28 03/10/12 16:37 1 1 Firefox 15.0.1 Windows NT 6.1 1366x768 11.4.402 ... 5 5 5 3 6 5 6 5 4 4
28 28 03/10/12 16:30 03/10/12 16:37 1 1 MSIE 9 Windows NT 6.1 1920x1200 11.4.402.278 ... 5 5 6 1 6 6 5 1 4 1
29 29 03/10/12 16:33 03/10/12 16:37 3 1 Firefox 10.0.7 Linux x86_64 2560x1600 11.2.202 ... 2 2 4 3 4 5 4 5 6 4
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
10179 10179 13/11/12 01:55 13/11/12 01:57 0 2 MSIE 8 Windows NT 6.1 1366x768 11.5.502.110 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
10180 10180 17/11/12 04:59 17/11/12 05:00 0 2 MSIE 9 Windows NT 6.1 1536x864 11.5.502.110 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
10181 10181 12/11/12 22:53 12/11/12 22:55 0 2 MSIE 9 Windows NT 6.0 1280x1024 10.1.53.64 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
10182 10182 20/11/12 15:58 20/11/12 15:59 0 2 MSIE 8 Windows NT 5.1 1024x768 11.4.402.287 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
10183 10183 21/11/12 02:10 21/11/12 02:13 0 2 MSIE 8 Windows NT 5.1 1280x800 11.5.502.110 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
10184 10184 13/11/12 00:08 13/11/12 00:09 0 2 NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
10185 10185 16/11/12 12:19 16/11/12 12:20 0 2 MSIE 8 Windows NT 5.1 1170x936 10.1.53.64 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
10186 10186 16/11/12 23:40 16/11/12 23:41 0 2 MSIE 9 Windows NT 6.1 1280x1024 -1 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
10187 10187 16/11/12 13:55 16/11/12 13:58 0 2 MSIE 9 Windows NT 6.1 1093x614 11.1.102.55 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
10188 10188 16/11/12 13:26 16/11/12 13:27 0 2 MSIE 9 Windows NT 6.0 1680x1050 11.4.402.287 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
10189 10189 18/11/12 01:27 18/11/12 01:33 0 2 MSIE 9 Windows NT 6.1 1280x1024 10.3.181.34 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
10190 10190 19/11/12 04:16 19/11/12 04:16 0 2 MSIE 9 Windows NT 6.1 1311x737 10.2.153.1 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
10191 10191 17/11/12 10:37 17/11/12 10:40 0 2 MSIE 8 Windows NT 5.1 853x683 -1 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
10192 10192 16/11/12 12:01 16/11/12 12:06 0 2 MSIE 8 Windows NT 6.1 1600x900 11.4.402.287 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
10193 10193 13/11/12 05:55 13/11/12 05:57 0 2 MSIE 9 Windows NT 6.0 1170x731 11.5.502.110 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
10194 10194 12/11/12 15:59 12/11/12 16:01 0 2 Safari 5.0.6 Macintosh 1280x800 11.1.102 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
10195 10195 16/11/12 12:08 16/11/12 12:08 0 2 Firefox 16 Windows NT 5.1 1280x768 10.3.183 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
10196 10196 15/11/12 23:01 15/11/12 23:08 0 2 MSIE 9 Windows NT 6.1 1920x1080 11.5.502.110 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
10197 10197 16/11/12 02:32 16/11/12 02:33 0 2 MSIE 9 Windows NT 6.1 1536x864 11.1.102.63 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
10198 10198 16/11/12 13:07 16/11/12 13:10 0 2 MSIE 7 Windows NT 6.0 1280x800 10.0.45.2 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
10199 10199 16/11/12 13:54 16/11/12 13:55 0 2 MSIE 9 Windows NT 6.1 1920x1080 11.5.502.110 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
10200 10200 12/11/12 14:11 12/11/12 14:13 0 2 Chrome 15.0.900.2 Windows NT 6.1 1366x768 11.5.502 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
10201 10201 21/11/12 02:04 21/11/12 02:11 0 2 MSIE 8 Windows NT 6.0 1280x800 11.1.102.55 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
10202 10202 13/11/12 01:50 13/11/12 01:52 0 2 MSIE 9 Windows NT 6.1 1463x823 11.1.102.63 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
10203 10203 20/11/12 22:41 20/11/12 22:47 0 2 MSIE 9 Windows NT 6.1 1536x864 11.4.402.287 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
10204 10204 16/11/12 23:50 17/11/12 00:03 0 2 MSIE 8 Windows NT 5.1 1366x768 11.4.402.287 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
10205 10205 13/11/12 07:11 13/11/12 07:12 0 2 MSIE 8 Windows NT 5.1 1280x1024 10.2.159.1 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
10206 10206 13/11/12 00:27 13/11/12 00:29 0 2 MSIE 8 Windows NT 5.1 1024x768 11.1.102.63 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
10207 10207 23/11/12 08:00 23/11/12 08:00 2 2 Chrome 23.0.1271.64 Windows NT 5.1 1024x768 11.5.31 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
10208 10208 23/11/12 15:22 23/11/12 15:28 1 2 MSIE 9 Windows NT 6.1 960x540 10.3.181.23 ... 7 6 6 2 7 2 7 5 2 4

10209 rows × 161 columns


In [3]:
columns_description = df_raw.iloc[0].copy()
df = df_raw.drop(0, axis=0)

In [4]:
columns_description


Out[4]:
ID                                                                                            Unique ID for each participant
StartDate                                                                                                          StartDate
EndDate                                                                                                              EndDate
Response Status                                                            0=incomplete response, 1=complete response, 2=...
Participant Type                                                                                             1=ours, 2=panel
Q1_1_TEXT                                                                                          Browser Meta Info-Browser
Q1_2_TEXT                                                                                          Browser Meta Info-Version
Q1_3_TEXT                                                                                 Browser Meta Info-Operating System
Q1_4_TEXT                                                                                Browser Meta Info-Screen Resolution
Q1_5_TEXT                                                                                    Browser Meta Info-Flash Version
Q1_6_TEXT                                                                                     Browser Meta Info-Java Support
Q1_7_TEXT                                                                                       Browser Meta Info-User Agent
Q2                                                                         Do you own a mobile device? (e.g., phone or ta...
Q3_1_TEXT                                                                  What mobile device do you use?-Manufacturer na...
Q3_2_TEXT                                                                  What mobile device do you use?-Model name and ...
Do you use apps? (based on all app related questions)                                       0=Did not answer Q3, 1=Yes, 2=No
Q4                                                                         Which app store do you use?-1=Apple iOS App St...
For those who answered Q4, do they know which app store they are using?    Does the respondent know which app store he/sh...
Q5                                                                         How frequently do you visit the app store to l...
Q6                                                                         On average, how many apps do you download a mo...
Q7_1                                                                       When do you look for apps? (please select all ...
Q7_2                                                                       When do you look for apps? (please select all ...
Q7_3                                                                       When do you look for apps? (please select all ...
Q7_4                                                                       When do you look for apps? (please select all ...
Q7_5                                                                       When do you look for apps? (please select all ...
Q7_6                                                                       When do you look for apps? (please select all ...
Q8_1                                                                       How do you find apps? (please select all that ...
Q8_2                                                                       How do you find apps? (please select all that ...
Q8_3                                                                       How do you find apps? (please select all that ...
Q8_4                                                                       How do you find apps? (please select all that ...
                                                                                                 ...                        
Q22                                                                        What is your ethnicity?-1=Asian (please specif...
Q23                                                                        What is the highest level of education you hav...
Q24                                                                        How many years of education have you received?...
Q25                                                                        Do you have a disability?-1=Yes (please specif...
Q26                                                                        What is your current employment status?-1=Full...
Q27                                                                        What is your current or most recent occupation...
Q28                                                                        What currency is your household income in?-1=A...
Q29.1                                                                      What is your annual household income in Austra...
Q29.2                                                                      What is your annual household income in Brazil...
Q29.3                                                                      What is your annual household income in Britis...
Q29.4                                                                      What is your annual household income in Canadi...
Q29.5                                                                      What is your annual household income in Chines...
Q29.6                                                                      What is your annual household income in Euro (...
Q29.7                                                                      What is your annual household income in Indian...
Q29.8                                                                      What is your annual household income in Japane...
Q29.9                                                                      What is your annual household income in Mexica...
Q29.10                                                                     What is your annual household income in Russia...
Q29.11                                                                     What is your annual household income in South ...
Q29.12                                                                     What is your annual household income in US Dol...
Q29.13                                                                     What is your annual household income in the cu...
Q30_1                                                                      Your personality may influence the types of ap...
Q30_2                                                                      Your personality may influence the types of ap...
Q30_3                                                                      Your personality may influence the types of ap...
Q30_4                                                                      Your personality may influence the types of ap...
Q30_5                                                                      Your personality may influence the types of ap...
Q30_6                                                                      Your personality may influence the types of ap...
Q30_7                                                                      Your personality may influence the types of ap...
Q30_8                                                                      Your personality may influence the types of ap...
Q30_9                                                                      Your personality may influence the types of ap...
Q30_10                                                                     Your personality may influence the types of ap...
Name: 0, dtype: object

In [5]:
columns_description['Q4']


Out[5]:
"Which app store do you use?-1=Apple iOS App Store, 2=Blackberry App World, 3=Google Play / Android Market, 4=Nokia Ovi Store, 5=Samsung Application Store, 6=Windows Phone Marketplace, 7=None - my mobile device cannot run apps, 8=I don't know, 9=Other (please specify), 10=I don\xd5t use apps"

In [6]:
df['Q3_1_TEXT'][:100]


Out[6]:
1                NaN
2         Apple Inc.
3               iPad
4            Samsung
5              apple
6            I phone
7              Apple
8      Sony Ericsson
9              Apple
10             docmo
11               HTC
12           Samsung
13     Sony Ericsson
14               NaN
15               NaN
16           Samsung
17            iPhone
18     Sony Ericsson
19             Apple
20               HTC
21               HTC
22             Nokia
23           Samsung
24               NaN
25           Samsung
26               htc
27             Apple
28             NOKIA
29             Nokia
30               NaN
           ...      
71        nokia 1100
72           Android
73            Iphone
74                LG
75             Nokia
76            huawei
77             Nokia
78               NaN
79             Nokia
80           Samsung
81                LG
82           Samsung
83           samsung
84             Apple
85           Samsung
86              Sony
87             Apple
88           Samsung
89             Apple
90             apple
91             nokia
92           samsung
93             Nokia
94            iPhone
95               HTC
96            iPhone
97             Nokia
98             Apple
99               NaN
100          Samsung
Name: Q3_1_TEXT, dtype: object

In [7]:
df['Q3_1_TEXT'] = df['Q3_1_TEXT'].str.lower()
df['Q1_3_TEXT'] = df['Q1_3_TEXT'].str.lower()

In [8]:
def map_manufacturer(row):
    if row['Q3_1_TEXT'] in ['apple','apple inc.','i phone','iphone','appel']\
        or row['Q4'] == 1:
        return 'Apple'
    elif row['Q3_1_TEXT'] in ['samsung','lg']\
        or 'android' in str(row['Q1_3_TEXT'])\
        or row['Q4'] == 3:
        return 'Android'
    else:
        return 'Other'

df['Platform'] = df.apply(map_manufacturer, axis = 1)
df.head(50)


Out[8]:
ID StartDate EndDate Response Status Participant Type Q1_1_TEXT Q1_2_TEXT Q1_3_TEXT Q1_4_TEXT Q1_5_TEXT ... Q30_2 Q30_3 Q30_4 Q30_5 Q30_6 Q30_7 Q30_8 Q30_9 Q30_10 Platform
1 1 26/09/12 07:46 26/09/12 07:46 2 1 Chrome 21.0.1180.89 wow64 1280x800 11.3.31 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN Other
2 2 26/09/12 07:45 26/09/12 07:56 1 1 Safari iPhone 6 cpu iphone os 6_0 like mac os x 320x480 -1 ... 3 7 2 6 3 4 3 4 4 Apple
3 3 26/09/12 07:45 26/09/12 08:01 1 1 Safari 6 cpu os 6_0 like mac os x 768x1024 -1 ... 4 5 2 3 3 5 3 5 3 Other
4 4 26/09/12 16:58 26/09/12 17:05 1 1 Firefox 15.0.1 intel mac os x 10.6 1920x1200 11.4.402 ... 3 6 3 5 5 5 2 5 3 Android
5 5 27/09/12 04:16 27/09/12 04:24 1 1 Chrome 22.0.1229.79 intel mac os x 10_7_4 1280x800 11.4.402 ... 6 4 3 6 5 7 3 5 3 Apple
6 6 27/09/12 08:50 27/09/12 08:56 1 1 Chrome 21.0.1180.89 wow64 1920x1080 11.3.31 ... 2 6 2 6 4 3 2 5 2 Apple
7 7 28/09/12 07:34 28/09/12 07:52 1 1 Chrome 22.0.1229.79 macintosh 1280x800 11.4.402 ... 3 4 3 5 4 5 6 5 4 Apple
8 8 28/09/12 10:07 28/09/12 10:20 1 1 Firefox 15.0.1 windows nt 6.1 1366x768 11.4.402 ... 4 4 1 5 6 4 2 5 6 Other
9 9 28/09/12 11:32 28/09/12 11:40 1 1 Firefox 15.0.1 windows nt 6.1 1600x900 11.4.402 ... 3 4 6 6 5 4 5 4 6 Apple
10 10 28/09/12 12:22 28/09/12 12:46 3 1 Chrome 22.0.1229.91 windows nt 6.1 1366x768 11.3.31 ... 5 4 5 5 5 4 5 4 4 Other
11 11 28/09/12 13:23 28/09/12 13:35 1 1 Safari 4 android 2.3.5 800x1184 11.1.111 ... 3 5 4 4 6 7 7 6 6 Android
12 12 28/09/12 13:28 28/09/12 13:39 3 1 Safari iPhone 5,1 ipod 320x480 -1 ... 6 2 3 4 3 4 3 3 3 Android
13 13 28/09/12 11:00 02/10/12 06:45 1 1 Chrome 22.0.1229.79 windows nt 5.1 1920x1200 11.3.31 ... 3 5 4 5 3 5 5 5 3 Other
14 14 02/10/12 10:27 02/10/12 10:27 2 1 Chrome 22.0.1229.79 windows nt 5.1 1280x768 11.3.31 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN Other
15 15 02/10/12 16:34 02/10/12 16:34 2 1 Chrome 22.0.1229.79 windows nt 6.1 1600x900 11.3.31 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN Other
16 16 03/10/12 11:10 03/10/12 11:21 1 1 Firefox 3.5.16) linux x86_64 1080x1920 11.2.202 ... 5 3 7 4 6 6 5 3 4 Android
17 17 03/10/12 14:38 03/10/12 14:43 1 1 Chrome 22.0.1229.79 windows nt 6.1 1920x1080 11.3.31 ... 3 2 3 7 4 5 7 3 4 Apple
18 18 03/10/12 15:11 03/10/12 15:26 1 1 Firefox 15.0.1 windows nt 6.1 1600x900 11.4.402 ... 4 5 3 5 6 5 3 5 5 Other
19 19 03/10/12 15:45 03/10/12 15:51 1 1 Chrome 22.0.1229.79 windows nt 6.1 1920x1080 11.3.31 ... 6 2 7 7 3 4 7 1 4 Apple
20 20 03/10/12 15:50 03/10/12 16:00 1 1 Chrome 22.0.1229.79 windows nt 6.1 1366x768 11.3.31 ... 4 6 5 4 4 7 1 3 3 Other
21 21 03/10/12 16:02 03/10/12 16:09 1 1 Chrome 21.0.1180.90 macintosh 1920x1200 10.3.183 ... 3 6 5 5 6 6 5 4 4 Other
22 22 03/10/12 15:58 03/10/12 16:13 1 1 Firefox 15.0.1 linux x86_64 1920x1080 11.2.202 ... 4 5 5 6 6 3 2 5 3 Other
23 23 03/10/12 15:49 03/10/12 16:27 1 1 Firefox 15.0.1 ubuntu 1600x900 10.1.999.Gnash ... 7 6 1 7 5 6 1 6 1 Android
24 24 03/10/12 16:30 03/10/12 16:30 2 1 Chrome 22.0.1229.79 windows nt 6.1 1280x1024 11.3.31 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN Other
25 25 03/10/12 16:30 03/10/12 16:35 1 1 Safari 6.0.1 macintosh 1920x1200 11.4.402 ... 5 5 2 6 6 5 2 5 3 Android
26 26 03/10/12 16:30 03/10/12 16:37 1 1 Firefox 10.0.7 linux x86_64 1680x1050 11.0.1 ... 1 1 1 1 1 1 1 1 1 Other
27 27 03/10/12 16:28 03/10/12 16:37 1 1 Firefox 15.0.1 windows nt 6.1 1366x768 11.4.402 ... 5 5 3 6 5 6 5 4 4 Apple
28 28 03/10/12 16:30 03/10/12 16:37 1 1 MSIE 9 windows nt 6.1 1920x1200 11.4.402.278 ... 5 6 1 6 6 5 1 4 1 Other
29 29 03/10/12 16:33 03/10/12 16:37 3 1 Firefox 10.0.7 linux x86_64 2560x1600 11.2.202 ... 2 4 3 4 5 4 5 6 4 Other
30 30 03/10/12 16:37 03/10/12 16:38 2 1 MSIE 9 windows nt 6.1 1920x1200 11.4.402.278 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN Other
31 31 03/10/12 16:30 03/10/12 16:38 3 1 Firefox 11 windows nt 6.1 1680x1050 10.1.53 ... 7 6 2 5 6 7 1 3 5 Other
32 32 03/10/12 16:33 03/10/12 16:38 3 1 Chrome 18.0.1025.152 windows nt 6.1 1600x1200 11.2.202 ... 1 4 3 6 5 2 1 5 5 Android
33 33 03/10/12 16:30 03/10/12 16:40 1 1 Firefox 15.0.1 linux x86_64 1920x1200 11.1.102 ... 4 5 1 6 4 6 4 6 2 Android
34 34 03/10/12 16:32 03/10/12 16:41 1 1 Chrome 21.0.1180.89 macintosh 1280x800 11.4.402 ... 3 4 1 7 4 7 1 4 1 Other
35 35 03/10/12 16:36 03/10/12 16:42 3 1 MSIE 9 windows nt 6.1 1024x600 11.1.102.55 ... 4 7 1 6 1 2 1 5 1 Other
36 36 03/10/12 16:38 03/10/12 16:43 1 1 Chrome 22.0.1229.79 macintosh 1440x900 11.4.402 ... 5 4 4 5 6 5 2 6 5 Other
37 37 03/10/12 16:35 03/10/12 16:43 3 1 Safari 6.0.1 macintosh 1280x800 11.4.402 ... 5 5 3 6 4 6 3 6 3 Other
38 38 03/10/12 16:33 03/10/12 16:44 1 1 Firefox 11 ubuntu 1366x768 11.2.202 ... 4 6 1 7 5 6 2 7 4 Other
39 39 03/10/12 16:39 03/10/12 16:48 1 1 MSIE 9 windows nt 6.1 991x793 10.0.45.2 ... 6 7 5 7 6 2 1 7 2 Apple
40 40 03/10/12 16:44 03/10/12 16:53 1 1 Chrome 22.0.1229.79 windows nt 6.1 1680x1050 11.3.31 ... 6 1 1 5 5 4 1 6 3 Android
41 41 03/10/12 16:43 03/10/12 16:54 1 1 Firefox 15.0.1 windows nt 6.1 1680x1050 11.3.300 ... 5 5 3 4 7 3 2 5 6 Other
42 42 03/10/12 16:46 03/10/12 16:55 1 1 NaN NaN NaN NaN NaN ... 1 6 1 7 2 5 1 6 1 Android
43 43 03/10/12 16:50 03/10/12 17:02 3 1 Firefox 15.0.1 windows nt 6.1 1680x1050 -1 ... 1 5 1 6 4 7 2 4 1 Other
44 44 03/10/12 16:07 03/10/12 17:08 1 1 Chrome 15.0.866.0 windows nt 5.1 1024x576 11.4.402 ... 4 5 1 6 4 4 2 7 1 Android
45 45 03/10/12 17:04 03/10/12 17:16 1 1 Chrome 22.0.1229.79 macintosh 1680x1050 11.4.402 ... 6 6 5 5 4 6 2 4 5 Other
46 46 03/10/12 17:07 03/10/12 17:27 1 1 Safari 5.1.7 windows nt 6.1 1280x800 11.4.402 ... 3 7 5 5 3 4 3 5 3 Other
47 47 03/10/12 17:25 03/10/12 17:29 3 1 Firefox 10.0.7 linux x86_64 1680x1050 11.2.202 ... 2 2 2 3 2 6 2 5 2 Other
48 48 03/10/12 17:47 03/10/12 17:51 3 1 Firefox 11 ubuntu 1280x800 11.2.202 ... 4 5 5 6 6 6 3 6 2 Other
49 49 03/10/12 16:30 03/10/12 18:18 1 1 Chrome 22.0.1229.79 windows nt 6.1 1280x768 11.3.31 ... 4 6 6 6 7 4 2 6 2 Other
50 50 03/10/12 20:32 03/10/12 20:41 3 1 MSIE 9 windows nt 6.1 1600x900 10.0.32.18 ... 2 7 1 6 4 4 1 7 1 Other

50 rows × 162 columns


In [9]:
df['Platform'].value_counts().plot(kind='bar')


Out[9]:
<matplotlib.axes._subplots.AxesSubplot at 0x113353c10>

In [10]:
df[df['Platform' ] == 'Android'][['Platform','Q3_1_TEXT','Q1_3_TEXT', 'Q4']]


Out[10]:
Platform Q3_1_TEXT Q1_3_TEXT Q4
4 Android samsung intel mac os x 10.6 3
11 Android htc android 2.3.5 3
12 Android samsung ipod 7
16 Android samsung linux x86_64 3
23 Android samsung ubuntu 3
25 Android samsung macintosh 3
32 Android samsung windows nt 6.1 7
33 Android samsung linux x86_64 3
40 Android lg windows nt 6.1 3
42 Android samsung NaN 3
44 Android samsung windows nt 5.1 3
58 Android samsung windows nt 6.1 3
68 Android samsung linux x86_64 3
74 Android lg windows nt 6.1 3
80 Android samsung linux x86_64 3
81 Android lg windows nt 6.2 3
82 Android samsung linux x86_64 3
83 Android samsung windows nt 6.1 3
85 Android samsung windows nt 6.0 3
88 Android samsung android 2.3.3 3
92 Android samsung NaN 3
100 Android samsung windows nt 6.1 3
101 Android samsung windows nt 6.1 3
104 Android samsung windows nt 5.1 5
106 Android samsung android 2.3.3 3
107 Android lg ubuntu 3
109 Android samsung linux i686 on x86_64 3
110 Android samsung windows nt 6.1 3
111 Android samsung linux x86_64 3
113 Android samsung windows nt 6.0 3
... ... ... ... ...
9847 Android nec windows nt 5.1 3
9854 Android ___ android 2.3.3 3
9855 Android ____ android 2.3.4 3
9857 Android iida windows nt 6.1 3
9858 Android samsung windows nt 6.1 7
9866 Android ____ windows nt 5.1 3
9872 Android shape windows nt 6.1 3
9880 Android sharp android 2.3.4 3
9917 Android lg android 2.3.5 3
9919 Android sharp android 2.3.3 3
9925 Android sharp ipod 3
9930 Android _____ windows nt 6.1 3
9934 Android galapagos windows nt 6.1 3
9942 Android docomo ipod 3
9944 Android ____ macintosh 3
10017 Android ___ windows nt 6.1 3
10027 Android ___ windows nt 6.1 3
10060 Android sony ericsson windows nt 5.1 3
10071 Android htc windows nt 6.1 3
10073 Android ___ windows nt 6.1 3
10092 Android sony tablet windows nt 5.1 3
10112 Android ____ android 4.0.4 3
10124 Android ____ windows nt 6.0 3
10126 Android acer linux x86_64 3
10166 Android docomo windows nt 6.1 3
10168 Android toshiba windows nt 6.1 3
10169 Android _____ windows nt 6.1 3
10194 Android lg macintosh 9
10204 Android asus windows nt 5.1 3
10208 Android sh windows nt 6.1 3

2927 rows × 4 columns


In [11]:
question_features=['Q14_1','Q14_2','Q14_3','Q14_4','Q14_5','Q14_6','Q14_7','Q14_8','Q14_9','Q14_10','Q14_11','Q14_12','Q14_13','Q14_15','Q14_14']

In [12]:
df_selected = df[df['Platform']!='Other'][['Platform']+question_features].copy()

In [13]:
df_selected.describe()


Out[13]:
Platform Q14_1 Q14_2 Q14_3 Q14_4 Q14_5 Q14_6 Q14_7 Q14_8 Q14_9 Q14_10 Q14_11 Q14_12 Q14_13 Q14_15 Q14_14
count 4061 1147 1293 829 835 249 796 472 802 554 1073 1273 1001 1133 1445 76
unique 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
top Android 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
freq 2927 747 858 561 569 183 535 294 533 370 716 884 640 745 925 52

In [14]:
df_selected.fillna(0, inplace=True)
df_selected[question_features] = df_selected[question_features].astype(int)

In [15]:
#df_selected[question_features] = df_selected[question_features].apply(lambda x: x/x.sum(), axis=1)
df_selected.dropna(0, inplace=True) #nans in this case mean that the user did not answer any reason

In [16]:
df_selected.describe()


Out[16]:
Q14_1 Q14_2 Q14_3 Q14_4 Q14_5 Q14_6 Q14_7 Q14_8 Q14_9 Q14_10 Q14_11 Q14_12 Q14_13 Q14_15 Q14_14
count 4061.000000 4061.000000 4061.000000 4061.000000 4061.000000 4061.000000 4061.000000 4061.000000 4061.000000 4061.000000 4061.000000 4061.000000 4061.000000 4061.000000 4061.000000
mean 0.282443 0.318394 0.204137 0.205614 0.061315 0.196011 0.116228 0.197488 0.136420 0.264221 0.313470 0.246491 0.278995 0.355824 0.018715
std 0.450243 0.465911 0.403119 0.404200 0.239937 0.397026 0.320537 0.398153 0.343276 0.440972 0.463961 0.431021 0.448560 0.478821 0.135532
min 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
25% 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
50% 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
75% 1.000000 1.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 1.000000 1.000000 0.000000 1.000000 1.000000 0.000000
max 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000

In [17]:
df_selected['Q14_2'].unique()


Out[17]:
array([1, 0])

In [18]:
df_mean = df_selected.groupby('Platform').mean().add_suffix('_portion')
df_mean


Out[18]:
Q14_1_portion Q14_2_portion Q14_3_portion Q14_4_portion Q14_5_portion Q14_6_portion Q14_7_portion Q14_8_portion Q14_9_portion Q14_10_portion Q14_11_portion Q14_12_portion Q14_13_portion Q14_15_portion Q14_14_portion
Platform
Android 0.263068 0.286642 0.185173 0.184831 0.053639 0.171848 0.096003 0.191664 0.126409 0.269559 0.278100 0.238811 0.257602 0.325589 0.022890
Apple 0.332451 0.400353 0.253086 0.259259 0.081129 0.258377 0.168430 0.212522 0.162257 0.250441 0.404762 0.266314 0.334215 0.433862 0.007937

In [19]:
df_count = df_selected.groupby('Platform').count().add_suffix('_count')
df_count


Out[19]:
Q14_1_count Q14_2_count Q14_3_count Q14_4_count Q14_5_count Q14_6_count Q14_7_count Q14_8_count Q14_9_count Q14_10_count Q14_11_count Q14_12_count Q14_13_count Q14_15_count Q14_14_count
Platform
Android 2927 2927 2927 2927 2927 2927 2927 2927 2927 2927 2927 2927 2927 2927 2927
Apple 1134 1134 1134 1134 1134 1134 1134 1134 1134 1134 1134 1134 1134 1134 1134

In [20]:
df_agg = pandas.merge(df_mean, df_count, left_index=True, right_index=True)
df_agg


Out[20]:
Q14_1_portion Q14_2_portion Q14_3_portion Q14_4_portion Q14_5_portion Q14_6_portion Q14_7_portion Q14_8_portion Q14_9_portion Q14_10_portion ... Q14_6_count Q14_7_count Q14_8_count Q14_9_count Q14_10_count Q14_11_count Q14_12_count Q14_13_count Q14_15_count Q14_14_count
Platform
Android 0.263068 0.286642 0.185173 0.184831 0.053639 0.171848 0.096003 0.191664 0.126409 0.269559 ... 2927 2927 2927 2927 2927 2927 2927 2927 2927 2927
Apple 0.332451 0.400353 0.253086 0.259259 0.081129 0.258377 0.168430 0.212522 0.162257 0.250441 ... 1134 1134 1134 1134 1134 1134 1134 1134 1134 1134

2 rows × 30 columns


In [21]:
df_mean.to_csv('data/data_agg_user_dropoff.csv')

In [22]:
columns_description.to_json('data/data_columns_description.json')

In [23]:
df_selected.to_csv('data/data_user_dropoff.csv')

In [24]:
df_dimple = pandas.DataFrame(columns=['platform','value','answer'])
platforms = ['Android', 'Apple']
for column in df_mean.columns:
    for platform in platforms:
        value = df_mean.loc[platform, column]
        print ({'platform':platform, 'value': value, 'answer': column})
        df_dimple =  df_dimple.append({'platform':platform, 'value': value, 'answer': column}, ignore_index=True)


{'answer': 'Q14_1_portion', 'platform': 'Android', 'value': 0.26306798770071743}
{'answer': 'Q14_1_portion', 'platform': 'Apple', 'value': 0.33245149911816579}
{'answer': 'Q14_2_portion', 'platform': 'Android', 'value': 0.28664161257259996}
{'answer': 'Q14_2_portion', 'platform': 'Apple', 'value': 0.400352733686067}
{'answer': 'Q14_3_portion', 'platform': 'Android', 'value': 0.18517253160232319}
{'answer': 'Q14_3_portion', 'platform': 'Apple', 'value': 0.25308641975308643}
{'answer': 'Q14_4_portion', 'platform': 'Android', 'value': 0.18483088486504953}
{'answer': 'Q14_4_portion', 'platform': 'Apple', 'value': 0.25925925925925924}
{'answer': 'Q14_5_portion', 'platform': 'Android', 'value': 0.05363853775196447}
{'answer': 'Q14_5_portion', 'platform': 'Apple', 'value': 0.081128747795414458}
{'answer': 'Q14_6_portion', 'platform': 'Android', 'value': 0.17184830884865049}
{'answer': 'Q14_6_portion', 'platform': 'Apple', 'value': 0.25837742504409172}
{'answer': 'Q14_7_portion', 'platform': 'Android', 'value': 0.096002733173898194}
{'answer': 'Q14_7_portion', 'platform': 'Apple', 'value': 0.16843033509700175}
{'answer': 'Q14_8_portion', 'platform': 'Android', 'value': 0.19166381961052273}
{'answer': 'Q14_8_portion', 'platform': 'Apple', 'value': 0.21252204585537918}
{'answer': 'Q14_9_portion', 'platform': 'Android', 'value': 0.12640929279125385}
{'answer': 'Q14_9_portion', 'platform': 'Apple', 'value': 0.16225749559082892}
{'answer': 'Q14_10_portion', 'platform': 'Android', 'value': 0.26955927570891697}
{'answer': 'Q14_10_portion', 'platform': 'Apple', 'value': 0.25044091710758376}
{'answer': 'Q14_11_portion', 'platform': 'Android', 'value': 0.27810044414075846}
{'answer': 'Q14_11_portion', 'platform': 'Apple', 'value': 0.40476190476190477}
{'answer': 'Q14_12_portion', 'platform': 'Android', 'value': 0.23881106935428767}
{'answer': 'Q14_12_portion', 'platform': 'Apple', 'value': 0.26631393298059963}
{'answer': 'Q14_13_portion', 'platform': 'Android', 'value': 0.25760163990433893}
{'answer': 'Q14_13_portion', 'platform': 'Apple', 'value': 0.33421516754850089}
{'answer': 'Q14_15_portion', 'platform': 'Android', 'value': 0.32558934062179706}
{'answer': 'Q14_15_portion', 'platform': 'Apple', 'value': 0.43386243386243384}
{'answer': 'Q14_14_portion', 'platform': 'Android', 'value': 0.022890331397335154}
{'answer': 'Q14_14_portion', 'platform': 'Apple', 'value': 0.0079365079365079361}

In [25]:
df_dimple.to_csv('data/data_dimple_agg_user_dropoff.csv')

In [26]:
df_dimple


Out[26]:
platform value answer
0 Android 0.263068 Q14_1_portion
1 Apple 0.332451 Q14_1_portion
2 Android 0.286642 Q14_2_portion
3 Apple 0.400353 Q14_2_portion
4 Android 0.185173 Q14_3_portion
5 Apple 0.253086 Q14_3_portion
6 Android 0.184831 Q14_4_portion
7 Apple 0.259259 Q14_4_portion
8 Android 0.053639 Q14_5_portion
9 Apple 0.081129 Q14_5_portion
10 Android 0.171848 Q14_6_portion
11 Apple 0.258377 Q14_6_portion
12 Android 0.096003 Q14_7_portion
13 Apple 0.168430 Q14_7_portion
14 Android 0.191664 Q14_8_portion
15 Apple 0.212522 Q14_8_portion
16 Android 0.126409 Q14_9_portion
17 Apple 0.162257 Q14_9_portion
18 Android 0.269559 Q14_10_portion
19 Apple 0.250441 Q14_10_portion
20 Android 0.278100 Q14_11_portion
21 Apple 0.404762 Q14_11_portion
22 Android 0.238811 Q14_12_portion
23 Apple 0.266314 Q14_12_portion
24 Android 0.257602 Q14_13_portion
25 Apple 0.334215 Q14_13_portion
26 Android 0.325589 Q14_15_portion
27 Apple 0.433862 Q14_15_portion
28 Android 0.022890 Q14_14_portion
29 Apple 0.007937 Q14_14_portion

In [27]:
answer = 'Q14_1_portion'
columns_description[answer[:-8]][65:]


Out[27]:
'It crashes.'

In [28]:
columns_description[answer[:-8]]


Out[28]:
'What makes you stop using an app? (please select all that apply)-It crashes.'

In [29]:
answer[:-8]


Out[29]:
'Q14_1'

In [30]:
df_dimple['answer'] = df_dimple['answer'].map(lambda answer: columns_description[answer[:-8]][65:])

In [36]:
df_dimple[df_dimple['platform']=='Android']['value'].sum()


Out[36]:
2.9518278100444144

In [44]:
scale = lambda x: x/x.sum()
df_dimple_transformed = df_dimple.groupby('platform').transform(scale)
df_dimple_transformed


Out[44]:
value
0 0.089120
1 0.086906
2 0.097106
3 0.104657
4 0.062731
5 0.066160
6 0.062616
7 0.067773
8 0.018171
9 0.021208
10 0.058218
11 0.067543
12 0.032523
13 0.044030
14 0.064931
15 0.055556
16 0.042824
17 0.042416
18 0.091319
19 0.065468
20 0.094213
21 0.105809
22 0.080903
23 0.069617
24 0.087269
25 0.087367
26 0.110301
27 0.113416
28 0.007755
29 0.002075

In [45]:
df_dimple['value'] = df_dimple_transformed['value']

In [46]:
df_dimple


Out[46]:
platform value answer
0 Android 0.089120 It crashes.
1 Apple 0.086906 It crashes.
2 Android 0.097106 I found better alternatives.
3 Apple 0.104657 I found better alternatives.
4 Android 0.062731 The advertisements are annoying.
5 Apple 0.066160 The advertisements are annoying.
6 Android 0.062616 It is difficult to use.
7 Apple 0.067773 It is difficult to use.
8 Android 0.018171 It is no longer used by my friends and/or family.
9 Apple 0.021208 It is no longer used by my friends and/or family.
10 Android 0.058218 I need to pay extra for the features I need.
11 Apple 0.067543 I need to pay extra for the features I need.
12 Android 0.032523 I forgot about the app.
13 Apple 0.044030 I forgot about the app.
14 Android 0.064931 I do not need the features it provides.
15 Apple 0.055556 I do not need the features it provides.
16 Android 0.042824 It invades my privacy.
17 Apple 0.042416 It invades my privacy.
18 Android 0.091319 It is too slow.
19 Apple 0.065468 It is too slow.
20 Android 0.094213 I got bored of it.
21 Apple 0.105809 I got bored of it.
22 Android 0.080903 It does not work.
23 Apple 0.069617 It does not work.
24 Android 0.087269 It does not have the features I hoped for.
25 Apple 0.087367 It does not have the features I hoped for.
26 Android 0.110301 I don't need it anymore.
27 Apple 0.113416 I don't need it anymore.
28 Android 0.007755 Other (please specify)
29 Apple 0.002075 Other (please specify)

In [47]:
df_dimple.to_csv('data/data_dimple_agg_user_dropoff.csv')

In [ ]:


In [ ]: