In [0]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
In [0]:
churn = pd.read_csv("telecom_churn_data.csv")
In [452]:
churn.head(10)
Out[452]:
mobile_number
circle_id
loc_og_t2o_mou
std_og_t2o_mou
loc_ic_t2o_mou
last_date_of_month_6
last_date_of_month_7
last_date_of_month_8
last_date_of_month_9
arpu_6
arpu_7
arpu_8
arpu_9
onnet_mou_6
onnet_mou_7
onnet_mou_8
onnet_mou_9
offnet_mou_6
offnet_mou_7
offnet_mou_8
offnet_mou_9
roam_ic_mou_6
roam_ic_mou_7
roam_ic_mou_8
roam_ic_mou_9
roam_og_mou_6
roam_og_mou_7
roam_og_mou_8
roam_og_mou_9
loc_og_t2t_mou_6
loc_og_t2t_mou_7
loc_og_t2t_mou_8
loc_og_t2t_mou_9
loc_og_t2m_mou_6
loc_og_t2m_mou_7
loc_og_t2m_mou_8
loc_og_t2m_mou_9
loc_og_t2f_mou_6
loc_og_t2f_mou_7
loc_og_t2f_mou_8
loc_og_t2f_mou_9
loc_og_t2c_mou_6
loc_og_t2c_mou_7
loc_og_t2c_mou_8
loc_og_t2c_mou_9
loc_og_mou_6
loc_og_mou_7
loc_og_mou_8
loc_og_mou_9
std_og_t2t_mou_6
std_og_t2t_mou_7
std_og_t2t_mou_8
std_og_t2t_mou_9
std_og_t2m_mou_6
std_og_t2m_mou_7
std_og_t2m_mou_8
std_og_t2m_mou_9
std_og_t2f_mou_6
std_og_t2f_mou_7
std_og_t2f_mou_8
std_og_t2f_mou_9
std_og_t2c_mou_6
std_og_t2c_mou_7
std_og_t2c_mou_8
std_og_t2c_mou_9
std_og_mou_6
std_og_mou_7
std_og_mou_8
std_og_mou_9
isd_og_mou_6
isd_og_mou_7
isd_og_mou_8
isd_og_mou_9
spl_og_mou_6
spl_og_mou_7
spl_og_mou_8
spl_og_mou_9
og_others_6
og_others_7
og_others_8
og_others_9
total_og_mou_6
total_og_mou_7
total_og_mou_8
total_og_mou_9
loc_ic_t2t_mou_6
loc_ic_t2t_mou_7
loc_ic_t2t_mou_8
loc_ic_t2t_mou_9
loc_ic_t2m_mou_6
loc_ic_t2m_mou_7
loc_ic_t2m_mou_8
loc_ic_t2m_mou_9
loc_ic_t2f_mou_6
loc_ic_t2f_mou_7
loc_ic_t2f_mou_8
loc_ic_t2f_mou_9
loc_ic_mou_6
loc_ic_mou_7
loc_ic_mou_8
loc_ic_mou_9
std_ic_t2t_mou_6
std_ic_t2t_mou_7
std_ic_t2t_mou_8
std_ic_t2t_mou_9
std_ic_t2m_mou_6
std_ic_t2m_mou_7
std_ic_t2m_mou_8
std_ic_t2m_mou_9
std_ic_t2f_mou_6
std_ic_t2f_mou_7
std_ic_t2f_mou_8
std_ic_t2f_mou_9
std_ic_t2o_mou_6
std_ic_t2o_mou_7
std_ic_t2o_mou_8
std_ic_t2o_mou_9
std_ic_mou_6
std_ic_mou_7
std_ic_mou_8
std_ic_mou_9
total_ic_mou_6
total_ic_mou_7
total_ic_mou_8
total_ic_mou_9
spl_ic_mou_6
spl_ic_mou_7
spl_ic_mou_8
spl_ic_mou_9
isd_ic_mou_6
isd_ic_mou_7
isd_ic_mou_8
isd_ic_mou_9
ic_others_6
ic_others_7
ic_others_8
ic_others_9
total_rech_num_6
total_rech_num_7
total_rech_num_8
total_rech_num_9
total_rech_amt_6
total_rech_amt_7
total_rech_amt_8
total_rech_amt_9
max_rech_amt_6
max_rech_amt_7
max_rech_amt_8
max_rech_amt_9
date_of_last_rech_6
date_of_last_rech_7
date_of_last_rech_8
date_of_last_rech_9
last_day_rch_amt_6
last_day_rch_amt_7
last_day_rch_amt_8
last_day_rch_amt_9
date_of_last_rech_data_6
date_of_last_rech_data_7
date_of_last_rech_data_8
date_of_last_rech_data_9
total_rech_data_6
total_rech_data_7
total_rech_data_8
total_rech_data_9
max_rech_data_6
max_rech_data_7
max_rech_data_8
max_rech_data_9
count_rech_2g_6
count_rech_2g_7
count_rech_2g_8
count_rech_2g_9
count_rech_3g_6
count_rech_3g_7
count_rech_3g_8
count_rech_3g_9
av_rech_amt_data_6
av_rech_amt_data_7
av_rech_amt_data_8
av_rech_amt_data_9
vol_2g_mb_6
vol_2g_mb_7
vol_2g_mb_8
vol_2g_mb_9
vol_3g_mb_6
vol_3g_mb_7
vol_3g_mb_8
vol_3g_mb_9
arpu_3g_6
arpu_3g_7
arpu_3g_8
arpu_3g_9
arpu_2g_6
arpu_2g_7
arpu_2g_8
arpu_2g_9
night_pck_user_6
night_pck_user_7
night_pck_user_8
night_pck_user_9
monthly_2g_6
monthly_2g_7
monthly_2g_8
monthly_2g_9
sachet_2g_6
sachet_2g_7
sachet_2g_8
sachet_2g_9
monthly_3g_6
monthly_3g_7
monthly_3g_8
monthly_3g_9
sachet_3g_6
sachet_3g_7
sachet_3g_8
sachet_3g_9
fb_user_6
fb_user_7
fb_user_8
fb_user_9
aon
aug_vbc_3g
jul_vbc_3g
jun_vbc_3g
sep_vbc_3g
0
7000842753
109
0.0
0.0
0.0
6/30/2014
7/31/2014
8/31/2014
9/30/2014
197.385
214.816
213.803
21.100
NaN
NaN
0.00
NaN
NaN
NaN
0.00
NaN
NaN
NaN
0.00
NaN
NaN
NaN
0.00
NaN
NaN
NaN
0.00
NaN
NaN
NaN
0.00
NaN
NaN
NaN
0.00
NaN
NaN
NaN
0.00
NaN
NaN
NaN
0.00
NaN
NaN
NaN
0.00
NaN
NaN
NaN
0.00
NaN
NaN
NaN
0.00
NaN
NaN
NaN
0.0
NaN
NaN
NaN
0.00
NaN
NaN
NaN
0.00
NaN
NaN
NaN
0.00
NaN
NaN
NaN
0.0
NaN
0.00
0.00
0.00
0.00
NaN
NaN
0.16
NaN
NaN
NaN
4.13
NaN
NaN
NaN
1.15
NaN
NaN
NaN
5.44
NaN
NaN
NaN
0.00
NaN
NaN
NaN
0.00
NaN
NaN
NaN
0.00
NaN
NaN
NaN
0.0
NaN
NaN
NaN
0.00
NaN
0.00
0.00
5.44
0.00
NaN
NaN
0.00
NaN
NaN
NaN
0.00
NaN
NaN
NaN
0.00
NaN
4
3
2
6
362
252
252
0
252
252
252
0
6/21/2014
7/16/2014
8/8/2014
9/28/2014
252
252
252
0
6/21/2014
7/16/2014
8/8/2014
NaN
1.0
1.0
1.0
NaN
252.0
252.0
252.0
NaN
0.0
0.0
0.0
NaN
1.0
1.0
1.0
NaN
252.0
252.0
252.0
NaN
30.13
1.32
5.75
0.0
83.57
150.76
109.61
0.00
212.17
212.17
212.17
NaN
212.17
212.17
212.17
NaN
0.0
0.0
0.0
NaN
0
0
0
0
0
0
0
0
1
1
1
0
0
0
0
0
1.0
1.0
1.0
NaN
968
30.40
0.00
101.20
3.58
1
7001865778
109
0.0
0.0
0.0
6/30/2014
7/31/2014
8/31/2014
9/30/2014
34.047
355.074
268.321
86.285
24.11
78.68
7.68
18.34
15.74
99.84
304.76
53.76
0.00
0.00
0.00
0.00
0.00
0.00
0.00
0.00
23.88
74.56
7.68
18.34
11.51
75.94
291.86
53.76
0.00
0.00
0.00
0.00
0.00
2.91
0.00
0.00
35.39
150.51
299.54
72.11
0.23
4.11
0.00
0.00
0.00
0.46
0.13
0.00
0.00
0.00
0.00
0.00
0.0
0.0
0.0
0.0
0.23
4.58
0.13
0.00
0.0
0.00
0.00
0.0
4.68
23.43
12.76
0.00
0.00
0.0
0.0
0.00
40.31
178.53
312.44
72.11
1.61
29.91
29.23
116.09
17.48
65.38
375.58
56.93
0.00
8.93
3.61
0.00
19.09
104.23
408.43
173.03
0.00
0.00
2.35
0.00
5.90
0.00
12.49
15.01
0.00
0.00
0.00
0.00
0.0
0.0
0.0
0.0
5.90
0.00
14.84
15.01
26.83
104.23
423.28
188.04
0.00
0.00
0.00
0.00
1.83
0.00
0.00
0.00
0.00
0.00
0.00
0.00
4
9
11
5
74
384
283
121
44
154
65
50
6/29/2014
7/31/2014
8/28/2014
9/30/2014
44
23
30
0
NaN
7/25/2014
8/10/2014
NaN
NaN
1.0
2.0
NaN
NaN
154.0
25.0
NaN
NaN
1.0
2.0
NaN
NaN
0.0
0.0
NaN
NaN
154.0
50.0
NaN
0.00
108.07
365.47
0.0
0.00
0.00
0.00
0.00
NaN
0.00
0.00
NaN
NaN
28.61
7.60
NaN
NaN
0.0
0.0
NaN
0
1
0
0
0
0
2
0
0
0
0
0
0
0
0
0
NaN
1.0
1.0
NaN
1006
0.00
0.00
0.00
0.00
2
7001625959
109
0.0
0.0
0.0
6/30/2014
7/31/2014
8/31/2014
9/30/2014
167.690
189.058
210.226
290.714
11.54
55.24
37.26
74.81
143.33
220.59
208.36
118.91
0.00
0.00
0.00
38.49
0.00
0.00
0.00
70.94
7.19
28.74
13.58
14.39
29.34
16.86
38.46
28.16
24.11
21.79
15.61
22.24
0.00
135.54
45.76
0.48
60.66
67.41
67.66
64.81
4.34
26.49
22.58
8.76
41.81
67.41
75.53
9.28
1.48
14.76
22.83
0.00
0.0
0.0
0.0
0.0
47.64
108.68
120.94
18.04
0.0
0.00
0.00
0.0
46.56
236.84
96.84
42.08
0.45
0.0
0.0
0.00
155.33
412.94
285.46
124.94
115.69
71.11
67.46
148.23
14.38
15.44
38.89
38.98
99.48
122.29
49.63
158.19
229.56
208.86
155.99
345.41
72.41
71.29
28.69
49.44
45.18
177.01
167.09
118.18
21.73
58.34
43.23
3.86
0.0
0.0
0.0
0.0
139.33
306.66
239.03
171.49
370.04
519.53
395.03
517.74
0.21
0.00
0.00
0.45
0.00
0.85
0.00
0.01
0.93
3.14
0.00
0.36
5
4
2
7
168
315
116
358
86
200
86
100
6/17/2014
7/24/2014
8/14/2014
9/29/2014
0
200
86
0
NaN
NaN
NaN
9/17/2014
NaN
NaN
NaN
1.0
NaN
NaN
NaN
46.0
NaN
NaN
NaN
1.0
NaN
NaN
NaN
0.0
NaN
NaN
NaN
46.0
0.00
0.00
0.00
0.0
0.00
0.00
0.00
8.42
NaN
NaN
NaN
2.84
NaN
NaN
NaN
0.0
NaN
NaN
NaN
0.0
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0
0
NaN
NaN
NaN
1.0
1103
0.00
0.00
4.17
0.00
3
7001204172
109
0.0
0.0
0.0
6/30/2014
7/31/2014
8/31/2014
9/30/2014
221.338
251.102
508.054
389.500
99.91
54.39
310.98
241.71
123.31
109.01
71.68
113.54
0.00
54.86
44.38
0.00
0.00
28.09
39.04
0.00
73.68
34.81
10.61
15.49
107.43
83.21
22.46
65.46
1.91
0.65
4.91
2.06
0.00
0.00
0.00
0.00
183.03
118.68
37.99
83.03
26.23
14.89
289.58
226.21
2.99
1.73
6.53
9.99
0.00
0.00
0.00
0.00
0.0
0.0
0.0
0.0
29.23
16.63
296.11
236.21
0.0
0.00
0.00
0.0
10.96
0.00
18.09
43.29
0.00
0.0
0.0
0.00
223.23
135.31
352.21
362.54
62.08
19.98
8.04
41.73
113.96
64.51
20.28
52.86
57.43
27.09
19.84
65.59
233.48
111.59
48.18
160.19
43.48
66.44
0.00
129.84
1.33
38.56
4.94
13.98
1.18
0.00
0.00
0.00
0.0
0.0
0.0
0.0
45.99
105.01
4.94
143.83
280.08
216.61
53.13
305.38
0.59
0.00
0.00
0.55
0.00
0.00
0.00
0.00
0.00
0.00
0.00
0.80
10
11
18
14
230
310
601
410
60
50
50
50
6/28/2014
7/31/2014
8/31/2014
9/30/2014
30
50
50
30
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
0.00
0.00
0.00
0.0
0.00
0.00
0.00
0.00
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
NaN
NaN
NaN
NaN
2491
0.00
0.00
0.00
0.00
4
7000142493
109
0.0
0.0
0.0
6/30/2014
7/31/2014
8/31/2014
9/30/2014
261.636
309.876
238.174
163.426
50.31
149.44
83.89
58.78
76.96
91.88
124.26
45.81
0.00
0.00
0.00
0.00
0.00
0.00
0.00
0.00
50.31
149.44
83.89
58.78
67.64
91.88
124.26
37.89
0.00
0.00
0.00
1.93
0.00
0.00
0.00
0.00
117.96
241.33
208.16
98.61
0.00
0.00
0.00
0.00
9.31
0.00
0.00
0.00
0.00
0.00
0.00
0.00
0.0
0.0
0.0
0.0
9.31
0.00
0.00
0.00
0.0
0.00
0.00
0.0
0.00
0.00
0.00
5.98
0.00
0.0
0.0
0.00
127.28
241.33
208.16
104.59
105.68
88.49
233.81
154.56
106.84
109.54
104.13
48.24
1.50
0.00
0.00
0.00
214.03
198.04
337.94
202.81
0.00
0.00
0.86
2.31
1.93
0.25
0.00
0.00
0.00
0.00
0.00
0.00
0.0
0.0
0.0
0.0
1.93
0.25
0.86
2.31
216.44
198.29
338.81
205.31
0.00
0.00
0.00
0.18
0.00
0.00
0.00
0.00
0.48
0.00
0.00
0.00
5
6
3
4
196
350
287
200
56
110
110
50
6/26/2014
7/28/2014
8/9/2014
9/28/2014
50
110
110
50
6/4/2014
NaN
NaN
NaN
1.0
NaN
NaN
NaN
56.0
NaN
NaN
NaN
1.0
NaN
NaN
NaN
0.0
NaN
NaN
NaN
56.0
NaN
NaN
NaN
0.00
0.00
0.00
0.0
0.00
0.00
0.00
0.00
0.00
NaN
NaN
NaN
0.00
NaN
NaN
NaN
0.0
NaN
NaN
NaN
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0
0
0.0
NaN
NaN
NaN
1526
0.00
0.00
0.00
0.00
5
7000286308
109
0.0
0.0
0.0
6/30/2014
7/31/2014
8/31/2014
9/30/2014
50.258
58.810
83.386
170.826
50.16
43.63
85.48
138.79
19.28
13.44
14.46
46.91
0.00
0.00
0.00
0.00
0.00
0.00
0.00
0.00
50.16
43.63
85.48
138.79
16.39
8.83
12.38
44.78
0.00
0.00
0.00
2.13
0.00
0.00
0.00
0.00
66.56
52.46
97.86
185.71
0.00
0.00
0.00
0.00
0.00
0.00
0.00
0.00
2.88
4.61
2.08
0.00
0.0
0.0
0.0
0.0
2.88
4.61
2.08
0.00
0.0
0.00
0.00
0.0
0.00
0.00
0.00
0.00
0.00
0.0
0.0
0.00
69.44
57.08
99.94
185.71
28.73
30.03
56.26
68.38
49.19
57.44
62.46
84.01
0.00
0.00
0.00
0.00
77.93
87.48
118.73
152.39
0.00
0.00
0.00
0.00
0.00
0.00
0.00
0.00
77.03
71.06
37.93
52.03
0.0
0.0
0.0
0.0
77.03
71.06
37.93
52.03
155.39
158.76
157.13
205.39
0.43
0.21
0.23
0.53
0.00
0.00
0.00
0.00
0.00
0.00
0.23
0.43
2
2
3
3
120
0
130
130
120
0
130
130
6/19/2014
7/17/2014
8/24/2014
9/28/2014
120
0
0
0
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
0.00
0.00
0.00
0.0
0.00
0.00
0.00
0.00
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
NaN
NaN
NaN
NaN
1471
0.00
0.00
0.00
0.00
6
7001051193
109
0.0
0.0
0.0
6/30/2014
7/31/2014
8/31/2014
9/30/2014
429.023
190.704
255.114
114.751
71.03
45.03
76.66
15.23
262.73
49.24
92.08
50.33
0.00
0.00
0.00
0.00
0.00
0.00
0.00
0.00
71.03
45.03
76.14
15.23
252.23
48.71
80.63
50.33
10.38
0.00
0.00
0.00
0.11
0.00
0.00
0.00
333.64
93.74
156.78
65.56
0.00
0.00
0.51
0.00
0.00
0.53
11.45
0.00
0.00
0.00
0.00
0.00
0.0
0.0
0.0
0.0
0.00
0.53
11.96
0.00
0.0
0.00
0.00
0.0
0.11
0.53
0.00
0.00
0.00
0.0
0.0
0.35
333.76
94.81
168.74
65.91
1857.99
1427.04
1896.43
2334.88
248.64
336.96
265.28
231.41
20.24
22.69
2.51
6.19
2126.89
1786.71
2164.23
2572.49
0.00
0.00
0.00
0.00
1.39
0.76
2.60
0.00
0.00
0.00
0.00
0.00
0.0
0.0
0.0
0.0
1.39
0.76
2.60
0.00
2128.41
1788.06
2167.11
2572.49
0.00
0.00
0.00
0.00
0.00
0.00
0.00
0.00
0.11
0.58
0.28
0.00
15
10
11
7
499
222
294
141
90
37
50
30
6/28/2014
7/31/2014
8/28/2014
9/28/2014
37
24
10
24
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
0.00
0.00
0.00
0.0
0.00
0.00
0.00
0.00
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
NaN
NaN
NaN
NaN
1673
0.00
0.00
0.00
0.00
7
7000701601
109
0.0
0.0
0.0
6/30/2014
7/31/2014
8/31/2014
9/30/2014
1069.180
1349.850
3171.480
500.000
57.84
54.68
52.29
NaN
453.43
567.16
325.91
NaN
16.23
33.49
31.64
NaN
23.74
12.59
38.06
NaN
51.39
31.38
40.28
NaN
308.63
447.38
162.28
NaN
62.13
55.14
53.23
NaN
0.00
0.00
0.00
NaN
422.16
533.91
255.79
NaN
4.30
23.29
12.01
NaN
49.89
31.76
49.14
NaN
6.66
20.08
16.68
NaN
0.0
0.0
0.0
NaN
60.86
75.14
77.84
NaN
0.0
0.18
10.01
NaN
4.50
0.00
6.50
NaN
0.00
0.0
0.0
NaN
487.53
609.24
350.16
0.00
58.14
32.26
27.31
NaN
217.56
221.49
121.19
NaN
152.16
101.46
39.53
NaN
427.88
355.23
188.04
NaN
36.89
11.83
30.39
NaN
91.44
126.99
141.33
NaN
52.19
34.24
22.21
NaN
0.0
0.0
0.0
NaN
180.54
173.08
193.94
NaN
626.46
558.04
428.74
0.00
0.21
0.00
0.00
NaN
2.06
14.53
31.59
NaN
15.74
15.19
15.14
NaN
5
5
7
3
1580
790
3638
0
1580
790
1580
0
6/27/2014
7/25/2014
8/26/2014
9/30/2014
0
0
779
0
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
0.00
0.00
0.00
0.0
0.00
0.00
0.00
0.00
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
NaN
NaN
NaN
NaN
802
57.74
19.38
18.74
0.00
8
7001524846
109
0.0
0.0
0.0
6/30/2014
7/31/2014
8/31/2014
9/30/2014
378.721
492.223
137.362
166.787
413.69
351.03
35.08
33.46
94.66
80.63
136.48
108.71
0.00
0.00
0.00
0.00
0.00
0.00
0.00
0.00
297.13
217.59
12.49
26.13
80.96
70.58
50.54
34.58
0.00
0.00
0.00
0.00
0.00
0.00
7.15
0.00
378.09
288.18
63.04
60.71
116.56
133.43
22.58
7.33
13.69
10.04
75.69
74.13
0.00
0.00
0.00
0.00
0.0
0.0
0.0
0.0
130.26
143.48
98.28
81.46
0.0
0.00
0.00
0.0
0.00
0.00
10.23
0.00
0.00
0.0
0.0
0.00
508.36
431.66
171.56
142.18
23.84
9.84
0.31
4.03
57.58
13.98
15.48
17.34
0.00
0.00
0.00
0.00
81.43
23.83
15.79
21.38
0.00
0.58
0.10
0.00
22.43
4.08
0.65
13.53
0.00
0.00
0.00
0.00
0.0
0.0
0.0
0.0
22.43
4.66
0.75
13.53
103.86
28.49
16.54
34.91
0.00
0.00
0.00
0.00
0.00
0.00
0.00
0.00
0.00
0.00
0.00
0.00
19
21
14
15
437
601
120
186
90
154
30
36
6/25/2014
7/31/2014
8/30/2014
9/30/2014
50
0
10
0
NaN
7/31/2014
8/23/2014
NaN
NaN
2.0
3.0
NaN
NaN
154.0
23.0
NaN
NaN
2.0
3.0
NaN
NaN
0.0
0.0
NaN
NaN
177.0
69.0
NaN
0.00
356.00
0.03
0.0
0.00
750.95
11.94
0.00
NaN
0.00
19.83
NaN
NaN
0.00
0.00
NaN
NaN
0.0
0.0
NaN
0
1
0
0
0
1
3
0
0
0
0
0
0
0
0
0
NaN
1.0
1.0
NaN
315
21.03
910.65
122.16
0.00
9
7001864400
109
0.0
0.0
0.0
6/30/2014
7/31/2014
8/31/2014
9/30/2014
119.518
247.435
170.231
160.042
33.89
30.11
22.43
27.84
63.48
54.16
78.34
123.48
0.00
0.00
0.00
0.00
0.00
0.00
0.00
0.00
33.89
30.11
22.43
27.84
38.03
40.06
34.93
37.26
0.00
0.00
0.00
0.00
0.00
0.00
0.00
0.00
71.93
70.18
57.36
65.11
0.00
0.00
0.00
0.00
25.45
14.09
43.41
83.26
0.00
0.00
0.00
2.94
0.0
0.0
0.0
0.0
25.45
14.09
43.41
86.21
0.0
0.00
0.00
0.0
0.66
0.00
0.00
0.00
0.00
0.0
0.0
0.00
98.04
84.28
100.78
151.33
129.34
124.34
49.93
313.38
132.94
96.24
122.58
65.06
0.40
0.00
0.00
0.48
262.69
220.59
172.51
378.93
0.30
0.00
0.00
4.38
32.86
78.21
1.74
1.18
0.00
0.00
0.00
0.00
0.0
0.0
0.0
0.0
33.16
78.21
1.74
5.56
303.98
327.31
219.86
412.63
0.00
0.00
0.00
0.00
8.11
28.49
45.59
28.13
0.00
0.00
0.00
0.00
4
2
5
3
220
195
210
180
110
154
50
130
6/29/2014
7/23/2014
8/29/2014
9/20/2014
110
154
30
50
NaN
7/23/2014
NaN
NaN
NaN
1.0
NaN
NaN
NaN
154.0
NaN
NaN
NaN
1.0
NaN
NaN
NaN
0.0
NaN
NaN
NaN
154.0
NaN
NaN
0.00
7.37
0.00
0.0
0.00
0.00
0.00
0.00
NaN
0.00
NaN
NaN
NaN
0.00
NaN
NaN
NaN
0.0
NaN
NaN
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
NaN
1.0
NaN
NaN
902
0.00
0.00
0.00
0.00
In [0]:
#To increase the display width of the data (i.e. #columns,rows)
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
In [454]:
churn.info(verbose=True)
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 99999 entries, 0 to 99998
Data columns (total 226 columns):
# Column Dtype
--- ------ -----
0 mobile_number int64
1 circle_id int64
2 loc_og_t2o_mou float64
3 std_og_t2o_mou float64
4 loc_ic_t2o_mou float64
5 last_date_of_month_6 object
6 last_date_of_month_7 object
7 last_date_of_month_8 object
8 last_date_of_month_9 object
9 arpu_6 float64
10 arpu_7 float64
11 arpu_8 float64
12 arpu_9 float64
13 onnet_mou_6 float64
14 onnet_mou_7 float64
15 onnet_mou_8 float64
16 onnet_mou_9 float64
17 offnet_mou_6 float64
18 offnet_mou_7 float64
19 offnet_mou_8 float64
20 offnet_mou_9 float64
21 roam_ic_mou_6 float64
22 roam_ic_mou_7 float64
23 roam_ic_mou_8 float64
24 roam_ic_mou_9 float64
25 roam_og_mou_6 float64
26 roam_og_mou_7 float64
27 roam_og_mou_8 float64
28 roam_og_mou_9 float64
29 loc_og_t2t_mou_6 float64
30 loc_og_t2t_mou_7 float64
31 loc_og_t2t_mou_8 float64
32 loc_og_t2t_mou_9 float64
33 loc_og_t2m_mou_6 float64
34 loc_og_t2m_mou_7 float64
35 loc_og_t2m_mou_8 float64
36 loc_og_t2m_mou_9 float64
37 loc_og_t2f_mou_6 float64
38 loc_og_t2f_mou_7 float64
39 loc_og_t2f_mou_8 float64
40 loc_og_t2f_mou_9 float64
41 loc_og_t2c_mou_6 float64
42 loc_og_t2c_mou_7 float64
43 loc_og_t2c_mou_8 float64
44 loc_og_t2c_mou_9 float64
45 loc_og_mou_6 float64
46 loc_og_mou_7 float64
47 loc_og_mou_8 float64
48 loc_og_mou_9 float64
49 std_og_t2t_mou_6 float64
50 std_og_t2t_mou_7 float64
51 std_og_t2t_mou_8 float64
52 std_og_t2t_mou_9 float64
53 std_og_t2m_mou_6 float64
54 std_og_t2m_mou_7 float64
55 std_og_t2m_mou_8 float64
56 std_og_t2m_mou_9 float64
57 std_og_t2f_mou_6 float64
58 std_og_t2f_mou_7 float64
59 std_og_t2f_mou_8 float64
60 std_og_t2f_mou_9 float64
61 std_og_t2c_mou_6 float64
62 std_og_t2c_mou_7 float64
63 std_og_t2c_mou_8 float64
64 std_og_t2c_mou_9 float64
65 std_og_mou_6 float64
66 std_og_mou_7 float64
67 std_og_mou_8 float64
68 std_og_mou_9 float64
69 isd_og_mou_6 float64
70 isd_og_mou_7 float64
71 isd_og_mou_8 float64
72 isd_og_mou_9 float64
73 spl_og_mou_6 float64
74 spl_og_mou_7 float64
75 spl_og_mou_8 float64
76 spl_og_mou_9 float64
77 og_others_6 float64
78 og_others_7 float64
79 og_others_8 float64
80 og_others_9 float64
81 total_og_mou_6 float64
82 total_og_mou_7 float64
83 total_og_mou_8 float64
84 total_og_mou_9 float64
85 loc_ic_t2t_mou_6 float64
86 loc_ic_t2t_mou_7 float64
87 loc_ic_t2t_mou_8 float64
88 loc_ic_t2t_mou_9 float64
89 loc_ic_t2m_mou_6 float64
90 loc_ic_t2m_mou_7 float64
91 loc_ic_t2m_mou_8 float64
92 loc_ic_t2m_mou_9 float64
93 loc_ic_t2f_mou_6 float64
94 loc_ic_t2f_mou_7 float64
95 loc_ic_t2f_mou_8 float64
96 loc_ic_t2f_mou_9 float64
97 loc_ic_mou_6 float64
98 loc_ic_mou_7 float64
99 loc_ic_mou_8 float64
100 loc_ic_mou_9 float64
101 std_ic_t2t_mou_6 float64
102 std_ic_t2t_mou_7 float64
103 std_ic_t2t_mou_8 float64
104 std_ic_t2t_mou_9 float64
105 std_ic_t2m_mou_6 float64
106 std_ic_t2m_mou_7 float64
107 std_ic_t2m_mou_8 float64
108 std_ic_t2m_mou_9 float64
109 std_ic_t2f_mou_6 float64
110 std_ic_t2f_mou_7 float64
111 std_ic_t2f_mou_8 float64
112 std_ic_t2f_mou_9 float64
113 std_ic_t2o_mou_6 float64
114 std_ic_t2o_mou_7 float64
115 std_ic_t2o_mou_8 float64
116 std_ic_t2o_mou_9 float64
117 std_ic_mou_6 float64
118 std_ic_mou_7 float64
119 std_ic_mou_8 float64
120 std_ic_mou_9 float64
121 total_ic_mou_6 float64
122 total_ic_mou_7 float64
123 total_ic_mou_8 float64
124 total_ic_mou_9 float64
125 spl_ic_mou_6 float64
126 spl_ic_mou_7 float64
127 spl_ic_mou_8 float64
128 spl_ic_mou_9 float64
129 isd_ic_mou_6 float64
130 isd_ic_mou_7 float64
131 isd_ic_mou_8 float64
132 isd_ic_mou_9 float64
133 ic_others_6 float64
134 ic_others_7 float64
135 ic_others_8 float64
136 ic_others_9 float64
137 total_rech_num_6 int64
138 total_rech_num_7 int64
139 total_rech_num_8 int64
140 total_rech_num_9 int64
141 total_rech_amt_6 int64
142 total_rech_amt_7 int64
143 total_rech_amt_8 int64
144 total_rech_amt_9 int64
145 max_rech_amt_6 int64
146 max_rech_amt_7 int64
147 max_rech_amt_8 int64
148 max_rech_amt_9 int64
149 date_of_last_rech_6 object
150 date_of_last_rech_7 object
151 date_of_last_rech_8 object
152 date_of_last_rech_9 object
153 last_day_rch_amt_6 int64
154 last_day_rch_amt_7 int64
155 last_day_rch_amt_8 int64
156 last_day_rch_amt_9 int64
157 date_of_last_rech_data_6 object
158 date_of_last_rech_data_7 object
159 date_of_last_rech_data_8 object
160 date_of_last_rech_data_9 object
161 total_rech_data_6 float64
162 total_rech_data_7 float64
163 total_rech_data_8 float64
164 total_rech_data_9 float64
165 max_rech_data_6 float64
166 max_rech_data_7 float64
167 max_rech_data_8 float64
168 max_rech_data_9 float64
169 count_rech_2g_6 float64
170 count_rech_2g_7 float64
171 count_rech_2g_8 float64
172 count_rech_2g_9 float64
173 count_rech_3g_6 float64
174 count_rech_3g_7 float64
175 count_rech_3g_8 float64
176 count_rech_3g_9 float64
177 av_rech_amt_data_6 float64
178 av_rech_amt_data_7 float64
179 av_rech_amt_data_8 float64
180 av_rech_amt_data_9 float64
181 vol_2g_mb_6 float64
182 vol_2g_mb_7 float64
183 vol_2g_mb_8 float64
184 vol_2g_mb_9 float64
185 vol_3g_mb_6 float64
186 vol_3g_mb_7 float64
187 vol_3g_mb_8 float64
188 vol_3g_mb_9 float64
189 arpu_3g_6 float64
190 arpu_3g_7 float64
191 arpu_3g_8 float64
192 arpu_3g_9 float64
193 arpu_2g_6 float64
194 arpu_2g_7 float64
195 arpu_2g_8 float64
196 arpu_2g_9 float64
197 night_pck_user_6 float64
198 night_pck_user_7 float64
199 night_pck_user_8 float64
200 night_pck_user_9 float64
201 monthly_2g_6 int64
202 monthly_2g_7 int64
203 monthly_2g_8 int64
204 monthly_2g_9 int64
205 sachet_2g_6 int64
206 sachet_2g_7 int64
207 sachet_2g_8 int64
208 sachet_2g_9 int64
209 monthly_3g_6 int64
210 monthly_3g_7 int64
211 monthly_3g_8 int64
212 monthly_3g_9 int64
213 sachet_3g_6 int64
214 sachet_3g_7 int64
215 sachet_3g_8 int64
216 sachet_3g_9 int64
217 fb_user_6 float64
218 fb_user_7 float64
219 fb_user_8 float64
220 fb_user_9 float64
221 aon int64
222 aug_vbc_3g float64
223 jul_vbc_3g float64
224 jun_vbc_3g float64
225 sep_vbc_3g float64
dtypes: float64(179), int64(35), object(12)
memory usage: 172.4+ MB
In [455]:
churn.info(verbose=False)
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 99999 entries, 0 to 99998
Columns: 226 entries, mobile_number to sep_vbc_3g
dtypes: float64(179), int64(35), object(12)
memory usage: 172.4+ MB
In [456]:
churn.describe(include=[np.number])
Out[456]:
mobile_number
circle_id
loc_og_t2o_mou
std_og_t2o_mou
loc_ic_t2o_mou
arpu_6
arpu_7
arpu_8
arpu_9
onnet_mou_6
onnet_mou_7
onnet_mou_8
onnet_mou_9
offnet_mou_6
offnet_mou_7
offnet_mou_8
offnet_mou_9
roam_ic_mou_6
roam_ic_mou_7
roam_ic_mou_8
roam_ic_mou_9
roam_og_mou_6
roam_og_mou_7
roam_og_mou_8
roam_og_mou_9
loc_og_t2t_mou_6
loc_og_t2t_mou_7
loc_og_t2t_mou_8
loc_og_t2t_mou_9
loc_og_t2m_mou_6
loc_og_t2m_mou_7
loc_og_t2m_mou_8
loc_og_t2m_mou_9
loc_og_t2f_mou_6
loc_og_t2f_mou_7
loc_og_t2f_mou_8
loc_og_t2f_mou_9
loc_og_t2c_mou_6
loc_og_t2c_mou_7
loc_og_t2c_mou_8
loc_og_t2c_mou_9
loc_og_mou_6
loc_og_mou_7
loc_og_mou_8
loc_og_mou_9
std_og_t2t_mou_6
std_og_t2t_mou_7
std_og_t2t_mou_8
std_og_t2t_mou_9
std_og_t2m_mou_6
std_og_t2m_mou_7
std_og_t2m_mou_8
std_og_t2m_mou_9
std_og_t2f_mou_6
std_og_t2f_mou_7
std_og_t2f_mou_8
std_og_t2f_mou_9
std_og_t2c_mou_6
std_og_t2c_mou_7
std_og_t2c_mou_8
std_og_t2c_mou_9
std_og_mou_6
std_og_mou_7
std_og_mou_8
std_og_mou_9
isd_og_mou_6
isd_og_mou_7
isd_og_mou_8
isd_og_mou_9
spl_og_mou_6
spl_og_mou_7
spl_og_mou_8
spl_og_mou_9
og_others_6
og_others_7
og_others_8
og_others_9
total_og_mou_6
total_og_mou_7
total_og_mou_8
total_og_mou_9
loc_ic_t2t_mou_6
loc_ic_t2t_mou_7
loc_ic_t2t_mou_8
loc_ic_t2t_mou_9
loc_ic_t2m_mou_6
loc_ic_t2m_mou_7
loc_ic_t2m_mou_8
loc_ic_t2m_mou_9
loc_ic_t2f_mou_6
loc_ic_t2f_mou_7
loc_ic_t2f_mou_8
loc_ic_t2f_mou_9
loc_ic_mou_6
loc_ic_mou_7
loc_ic_mou_8
loc_ic_mou_9
std_ic_t2t_mou_6
std_ic_t2t_mou_7
std_ic_t2t_mou_8
std_ic_t2t_mou_9
std_ic_t2m_mou_6
std_ic_t2m_mou_7
std_ic_t2m_mou_8
std_ic_t2m_mou_9
std_ic_t2f_mou_6
std_ic_t2f_mou_7
std_ic_t2f_mou_8
std_ic_t2f_mou_9
std_ic_t2o_mou_6
std_ic_t2o_mou_7
std_ic_t2o_mou_8
std_ic_t2o_mou_9
std_ic_mou_6
std_ic_mou_7
std_ic_mou_8
std_ic_mou_9
total_ic_mou_6
total_ic_mou_7
total_ic_mou_8
total_ic_mou_9
spl_ic_mou_6
spl_ic_mou_7
spl_ic_mou_8
spl_ic_mou_9
isd_ic_mou_6
isd_ic_mou_7
isd_ic_mou_8
isd_ic_mou_9
ic_others_6
ic_others_7
ic_others_8
ic_others_9
total_rech_num_6
total_rech_num_7
total_rech_num_8
total_rech_num_9
total_rech_amt_6
total_rech_amt_7
total_rech_amt_8
total_rech_amt_9
max_rech_amt_6
max_rech_amt_7
max_rech_amt_8
max_rech_amt_9
last_day_rch_amt_6
last_day_rch_amt_7
last_day_rch_amt_8
last_day_rch_amt_9
total_rech_data_6
total_rech_data_7
total_rech_data_8
total_rech_data_9
max_rech_data_6
max_rech_data_7
max_rech_data_8
max_rech_data_9
count_rech_2g_6
count_rech_2g_7
count_rech_2g_8
count_rech_2g_9
count_rech_3g_6
count_rech_3g_7
count_rech_3g_8
count_rech_3g_9
av_rech_amt_data_6
av_rech_amt_data_7
av_rech_amt_data_8
av_rech_amt_data_9
vol_2g_mb_6
vol_2g_mb_7
vol_2g_mb_8
vol_2g_mb_9
vol_3g_mb_6
vol_3g_mb_7
vol_3g_mb_8
vol_3g_mb_9
arpu_3g_6
arpu_3g_7
arpu_3g_8
arpu_3g_9
arpu_2g_6
arpu_2g_7
arpu_2g_8
arpu_2g_9
night_pck_user_6
night_pck_user_7
night_pck_user_8
night_pck_user_9
monthly_2g_6
monthly_2g_7
monthly_2g_8
monthly_2g_9
sachet_2g_6
sachet_2g_7
sachet_2g_8
sachet_2g_9
monthly_3g_6
monthly_3g_7
monthly_3g_8
monthly_3g_9
sachet_3g_6
sachet_3g_7
sachet_3g_8
sachet_3g_9
fb_user_6
fb_user_7
fb_user_8
fb_user_9
aon
aug_vbc_3g
jul_vbc_3g
jun_vbc_3g
sep_vbc_3g
count
9.999900e+04
99999.0
98981.0
98981.0
98981.0
99999.000000
99999.000000
99999.000000
99999.000000
96062.000000
96140.000000
94621.000000
92254.000000
96062.000000
96140.000000
94621.000000
92254.000000
96062.000000
96140.000000
94621.000000
92254.000000
96062.000000
96140.000000
94621.000000
92254.000000
96062.000000
96140.000000
94621.000000
92254.000000
96062.000000
96140.000000
94621.000000
92254.000000
96062.000000
96140.000000
94621.000000
92254.000000
96062.000000
96140.000000
94621.000000
92254.000000
96062.000000
96140.000000
94621.000000
92254.000000
96062.000000
96140.000000
94621.000000
92254.000000
96062.000000
96140.000000
94621.000000
92254.000000
96062.000000
96140.000000
94621.000000
92254.000000
96062.0
96140.0
94621.0
92254.0
96062.000000
96140.000000
94621.000000
92254.000000
96062.000000
96140.000000
94621.000000
92254.000000
96062.000000
96140.000000
94621.000000
92254.000000
96062.000000
96140.000000
94621.000000
92254.000000
99999.000000
99999.000000
99999.000000
99999.000000
96062.000000
96140.000000
94621.000000
92254.000000
96062.000000
96140.000000
94621.000000
92254.000000
96062.000000
96140.000000
94621.000000
92254.000000
96062.000000
96140.000000
94621.000000
92254.000000
96062.000000
96140.000000
94621.000000
92254.000000
96062.000000
96140.000000
94621.000000
92254.000000
96062.000000
96140.000000
94621.000000
92254.000000
96062.0
96140.0
94621.0
92254.0
96062.000000
96140.000000
94621.000000
92254.000000
99999.000000
99999.000000
99999.000000
99999.000000
96062.000000
96140.000000
94621.000000
92254.000000
96062.000000
96140.000000
94621.000000
92254.000000
96062.000000
96140.000000
94621.000000
92254.000000
99999.000000
99999.000000
99999.000000
99999.000000
99999.000000
99999.000000
99999.000000
99999.000000
99999.000000
99999.000000
99999.000000
99999.000000
99999.000000
99999.000000
99999.000000
99999.000000
25153.000000
25571.000000
26339.000000
25922.000000
25153.000000
25571.000000
26339.000000
25922.00000
25153.000000
25571.000000
26339.000000
25922.000000
25153.000000
25571.000000
26339.000000
25922.000000
25153.000000
25571.000000
26339.000000
25922.000000
99999.000000
99999.000000
99999.000000
99999.000000
99999.000000
99999.000000
99999.000000
99999.000000
25153.000000
25571.000000
26339.000000
25922.000000
25153.000000
25571.000000
26339.000000
25922.000000
25153.000000
25571.000000
26339.000000
25922.000000
99999.000000
99999.000000
99999.000000
99999.000000
99999.000000
99999.000000
99999.000000
99999.000000
99999.000000
99999.000000
99999.000000
99999.000000
99999.000000
99999.000000
99999.000000
99999.000000
25153.000000
25571.000000
26339.000000
25922.000000
99999.000000
99999.000000
99999.000000
99999.000000
99999.000000
mean
7.001207e+09
109.0
0.0
0.0
0.0
282.987358
278.536648
279.154731
261.645069
132.395875
133.670805
133.018098
130.302327
197.935577
197.045133
196.574803
190.337222
9.950013
7.149898
7.292981
6.343841
13.911337
9.818732
9.971890
8.555519
47.100763
46.473010
45.887806
44.584446
93.342088
91.397131
91.755128
90.463192
3.751013
3.792985
3.677991
3.655123
1.123056
1.368500
1.433821
1.232726
144.201175
141.670476
141.328209
138.709970
79.829870
83.299598
83.282673
82.342919
87.299624
90.804137
89.838390
86.276622
1.129011
1.115010
1.067792
1.042362
0.0
0.0
0.0
0.0
168.261218
175.221436
174.191498
169.664466
0.798277
0.776572
0.791247
0.723892
3.916811
4.978279
5.053769
4.412767
0.454157
0.030235
0.033372
0.047456
305.133424
310.231175
304.119513
289.279198
47.922365
47.990520
47.211362
46.281794
107.475650
107.120493
108.460515
106.155471
12.084305
12.599697
11.751834
12.173105
167.491059
167.719540
167.432575
164.619293
9.575993
10.011904
9.883921
9.432479
20.722240
21.656415
21.183211
19.620913
2.156397
2.216923
2.085004
2.173419
0.0
0.0
0.0
0.0
32.457179
33.887833
33.154735
31.229344
200.130037
202.853055
198.750783
189.214260
0.061557
0.033585
0.040361
0.163137
7.460608
8.334936
8.442001
8.063003
0.854656
1.012960
0.970800
1.017162
7.558806
7.700367
7.212912
6.893019
327.514615
322.962970
324.157122
303.345673
104.637486
104.752398
107.728207
101.943889
63.156252
59.385804
62.641716
43.901249
2.463802
2.666419
2.651999
2.441170
126.393392
126.729459
125.717301
124.94144
1.864668
2.044699
2.016288
1.781807
0.599133
0.621720
0.635711
0.659363
192.600982
200.981292
197.526489
192.734315
51.904956
51.229937
50.170154
44.719701
121.396219
128.995847
135.410689
136.056613
89.555057
89.384120
91.173849
100.264116
86.398003
85.914450
86.599478
93.712026
0.025086
0.023034
0.020844
0.015971
0.079641
0.083221
0.081001
0.068781
0.389384
0.439634
0.450075
0.393104
0.075921
0.078581
0.082941
0.086341
0.074781
0.080401
0.084501
0.084581
0.914404
0.908764
0.890808
0.860968
1219.854749
68.170248
66.839062
60.021204
3.299373
std
6.956694e+05
0.0
0.0
0.0
0.0
328.439770
338.156291
344.474791
341.998630
297.207406
308.794148
308.951589
308.477668
316.851613
325.862803
327.170662
319.396092
72.825411
73.447948
68.402466
57.137537
71.443196
58.455762
64.713221
58.438186
150.856393
155.318705
151.184830
147.995390
162.780544
157.492308
156.537048
158.681454
14.230438
14.264986
13.270996
13.457549
5.448946
7.533445
6.783335
5.619021
251.751489
248.731086
245.914311
245.934517
252.476533
263.631042
265.486090
267.184991
255.617850
269.347911
271.757783
261.407396
7.984970
8.599406
7.905971
8.261770
0.0
0.0
0.0
0.0
389.948499
408.922934
411.633049
405.138658
25.765248
25.603052
25.544471
21.310751
14.936449
20.661570
17.855111
16.328227
4.125911
2.161717
2.323464
3.635466
463.419481
480.031178
478.150031
468.980002
140.258485
145.795055
137.239552
140.130610
171.713903
169.423620
169.723759
165.492803
40.140895
42.977442
39.125379
43.840776
254.124029
256.242707
250.025523
249.845070
54.330607
57.411971
55.073186
53.376273
80.793414
86.521393
83.683565
74.913050
16.495594
16.454061
15.812580
15.978601
0.0
0.0
0.0
0.0
106.283386
113.720168
110.127008
101.982303
291.651671
298.124954
289.321094
284.823024
0.160920
0.155725
0.146147
0.527860
59.722948
65.219829
63.813098
63.505379
11.955164
12.673099
13.284348
12.381172
7.078405
7.070422
7.203753
7.096261
398.019701
408.114237
416.540455
404.588583
120.614894
124.523970
126.902505
125.375109
97.356649
95.915385
104.431816
90.809712
2.789128
3.031593
3.074987
2.516339
108.477235
109.765267
109.437851
111.36376
2.570254
2.768332
2.720132
2.214701
1.274428
1.394524
1.422827
1.411513
192.646318
196.791224
191.301305
188.400286
213.356445
212.302217
212.347892
198.653570
544.247227
541.494013
558.775335
577.394194
193.124653
195.893924
188.180936
216.291992
172.767523
176.379871
168.247852
171.384224
0.156391
0.150014
0.142863
0.125366
0.295058
0.304395
0.299568
0.278120
1.497320
1.636230
1.630263
1.347140
0.363371
0.387231
0.384947
0.384978
0.568344
0.628334
0.660234
0.650457
0.279772
0.287950
0.311885
0.345987
954.733842
267.580450
271.201856
253.938223
32.408353
min
7.000000e+09
109.0
0.0
0.0
0.0
-2258.709000
-2014.045000
-945.808000
-1899.505000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.0
0.0
0.0
0.0
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.0
0.0
0.0
0.0
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.00000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
1.000000
0.500000
0.500000
1.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
-30.820000
-26.040000
-24.490000
-71.090000
-35.830000
-15.480000
-55.830000
-45.740000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
180.000000
0.000000
0.000000
0.000000
0.000000
25%
7.000606e+09
109.0
0.0
0.0
0.0
93.411500
86.980500
84.126000
62.685000
7.380000
6.660000
6.460000
5.330000
34.730000
32.190000
31.630000
27.130000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
1.660000
1.630000
1.600000
1.360000
9.880000
10.025000
9.810000
8.810000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
17.110000
17.480000
17.110000
15.560000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.0
0.0
0.0
0.0
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
44.740000
43.010000
38.580000
25.510000
2.990000
3.230000
3.280000
3.290000
17.290000
18.590000
18.930000
18.560000
0.000000
0.000000
0.000000
0.000000
30.390000
32.460000
32.740000
32.290000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.0
0.0
0.0
0.0
0.000000
0.000000
0.010000
0.000000
38.530000
41.190000
38.290000
32.370000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
3.000000
3.000000
3.000000
3.000000
109.000000
100.000000
90.000000
52.000000
30.000000
30.000000
30.000000
28.000000
0.000000
0.000000
0.000000
0.000000
1.000000
1.000000
1.000000
1.000000
25.000000
25.000000
25.000000
25.00000
1.000000
1.000000
1.000000
1.000000
0.000000
0.000000
0.000000
0.000000
82.000000
92.000000
87.000000
69.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
1.000000
1.000000
1.000000
1.000000
467.000000
0.000000
0.000000
0.000000
0.000000
50%
7.001205e+09
109.0
0.0
0.0
0.0
197.704000
191.640000
192.080000
176.849000
34.310000
32.330000
32.360000
29.840000
96.310000
91.735000
92.140000
87.290000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
11.910000
11.610000
11.730000
11.260000
41.030000
40.430000
40.360000
39.120000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
65.110000
63.685000
63.730000
61.840000
0.000000
0.000000
0.000000
0.000000
3.950000
3.635000
3.310000
2.500000
0.000000
0.000000
0.000000
0.000000
0.0
0.0
0.0
0.0
11.640000
11.090000
10.410000
8.410000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
145.140000
141.530000
138.610000
125.460000
15.690000
15.740000
16.030000
15.660000
56.490000
57.080000
58.240000
56.610000
0.880000
0.930000
0.930000
0.960000
92.160000
92.550000
93.830000
91.640000
0.000000
0.000000
0.000000
0.000000
2.030000
2.040000
2.030000
1.740000
0.000000
0.000000
0.000000
0.000000
0.0
0.0
0.0
0.0
5.890000
5.960000
5.880000
5.380000
114.740000
116.340000
114.660000
105.890000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
6.000000
6.000000
5.000000
5.000000
230.000000
220.000000
225.000000
200.000000
110.000000
110.000000
98.000000
61.000000
30.000000
30.000000
30.000000
0.000000
1.000000
1.000000
1.000000
2.000000
145.000000
145.000000
145.000000
145.00000
1.000000
1.000000
1.000000
1.000000
0.000000
0.000000
0.000000
0.000000
154.000000
154.000000
154.000000
164.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.480000
0.420000
0.880000
2.605000
10.830000
8.810000
9.270000
14.800000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
1.000000
1.000000
1.000000
1.000000
863.000000
0.000000
0.000000
0.000000
0.000000
75%
7.001812e+09
109.0
0.0
0.0
0.0
371.060000
365.344500
369.370500
353.466500
118.740000
115.595000
115.860000
112.130000
231.860000
226.815000
228.260000
220.505000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
40.960000
39.910000
40.110000
39.280000
110.390000
107.560000
109.090000
106.810000
2.080000
2.090000
2.040000
1.940000
0.000000
0.000000
0.000000
0.000000
168.270000
164.382500
166.110000
162.225000
30.807500
31.132500
30.580000
28.230000
53.290000
54.040000
52.490000
48.560000
0.000000
0.000000
0.000000
0.000000
0.0
0.0
0.0
0.0
144.837500
150.615000
147.940000
142.105000
0.000000
0.000000
0.000000
0.000000
2.430000
3.710000
3.990000
3.230000
0.000000
0.000000
0.000000
0.000000
372.860000
378.570000
369.900000
353.480000
46.840000
45.810000
46.290000
45.180000
132.387500
130.960000
133.930000
130.490000
8.140000
8.282500
8.110000
8.140000
208.075000
205.837500
207.280000
202.737500
4.060000
4.230000
4.080000
3.510000
15.030000
15.740000
15.360000
14.260000
0.000000
0.000000
0.000000
0.000000
0.0
0.0
0.0
0.0
26.930000
28.310000
27.710000
25.690000
251.670000
250.660000
248.990000
236.320000
0.000000
0.000000
0.000000
0.060000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
9.000000
10.000000
9.000000
9.000000
437.500000
428.000000
434.500000
415.000000
120.000000
128.000000
144.000000
144.000000
110.000000
110.000000
130.000000
50.000000
3.000000
3.000000
3.000000
3.000000
177.000000
177.000000
179.000000
179.00000
2.000000
2.000000
2.000000
2.000000
1.000000
1.000000
1.000000
1.000000
252.000000
252.000000
252.000000
252.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
122.070000
119.560000
122.070000
140.010000
122.070000
122.070000
122.070000
140.010000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
1.000000
1.000000
1.000000
1.000000
1807.500000
0.000000
0.000000
0.000000
0.000000
max
7.002411e+09
109.0
0.0
0.0
0.0
27731.088000
35145.834000
33543.624000
38805.617000
7376.710000
8157.780000
10752.560000
10427.460000
8362.360000
9667.130000
14007.340000
10310.760000
13724.380000
15371.040000
13095.360000
8464.030000
3775.110000
2812.040000
5337.040000
4428.460000
6431.330000
7400.660000
10752.560000
10389.240000
4729.740000
4557.140000
4961.330000
4429.880000
1466.030000
1196.430000
928.490000
927.410000
342.860000
916.240000
502.090000
339.840000
10643.380000
7674.780000
11039.910000
11099.260000
7366.580000
8133.660000
8014.430000
9382.580000
8314.760000
9284.740000
13950.040000
10223.430000
628.560000
544.630000
516.910000
808.490000
0.0
0.0
0.0
0.0
8432.990000
10936.730000
13980.060000
11495.310000
5900.660000
5490.280000
5681.540000
4244.530000
1023.210000
2372.510000
1390.880000
1635.710000
800.890000
370.130000
394.930000
787.790000
10674.030000
11365.310000
14043.060000
11517.730000
6626.930000
9324.660000
10696.230000
10598.830000
4693.860000
4455.830000
6274.190000
5463.780000
1872.340000
1983.010000
2433.060000
4318.280000
7454.630000
9669.910000
10830.160000
10796.290000
5459.560000
5800.930000
4309.290000
3819.830000
5647.160000
6141.880000
5645.860000
5689.760000
1351.110000
1136.080000
1394.890000
1431.960000
0.0
0.0
0.0
0.0
5712.110000
6745.760000
5957.140000
5956.660000
7716.140000
9699.010000
10830.380000
10796.590000
19.760000
21.330000
16.860000
62.380000
6789.410000
5289.540000
4127.010000
5057.740000
1362.940000
1495.940000
2327.510000
1005.230000
307.000000
138.000000
196.000000
131.000000
35190.000000
40335.000000
45320.000000
37235.000000
4010.000000
4010.000000
4449.000000
3399.000000
4010.000000
4010.000000
4449.000000
3399.000000
61.000000
54.000000
60.000000
84.000000
1555.000000
1555.000000
1555.000000
1555.00000
42.000000
48.000000
44.000000
40.000000
29.000000
35.000000
45.000000
49.000000
7546.000000
4365.000000
4076.000000
4061.000000
10285.900000
7873.550000
11117.610000
8993.950000
45735.400000
28144.120000
30036.060000
39221.270000
6362.280000
4980.900000
3716.900000
13884.310000
6433.760000
4809.360000
3483.170000
3467.170000
1.000000
1.000000
1.000000
1.000000
4.000000
5.000000
5.000000
4.000000
42.000000
48.000000
44.000000
40.000000
14.000000
16.000000
16.000000
11.000000
29.000000
35.000000
41.000000
49.000000
1.000000
1.000000
1.000000
1.000000
4337.000000
12916.220000
9165.600000
11166.210000
2618.570000
In [457]:
churn.describe(include=[np.object])
Out[457]:
last_date_of_month_6
last_date_of_month_7
last_date_of_month_8
last_date_of_month_9
date_of_last_rech_6
date_of_last_rech_7
date_of_last_rech_8
date_of_last_rech_9
date_of_last_rech_data_6
date_of_last_rech_data_7
date_of_last_rech_data_8
date_of_last_rech_data_9
count
99999
99398
98899
98340
98392
98232
96377
95239
25153
25571
26339
25922
unique
1
1
1
1
30
31
31
30
30
31
31
30
top
6/30/2014
7/31/2014
8/31/2014
9/30/2014
6/30/2014
7/31/2014
8/31/2014
9/29/2014
6/30/2014
7/31/2014
8/31/2014
9/29/2014
freq
99999
99398
98899
98340
16960
17288
14706
22623
1888
1813
1998
2329
In [458]:
ObjectVars = list(churn.columns[churn.dtypes == 'object'])
print(ObjectVars)
['last_date_of_month_6', 'last_date_of_month_7', 'last_date_of_month_8', 'last_date_of_month_9', 'date_of_last_rech_6', 'date_of_last_rech_7', 'date_of_last_rech_8', 'date_of_last_rech_9', 'date_of_last_rech_data_6', 'date_of_last_rech_data_7', 'date_of_last_rech_data_8', 'date_of_last_rech_data_9']
In [0]:
#Seperating the date, ID and numerical columns
churn_date=['last_date_of_month_6',
'last_date_of_month_7',
'last_date_of_month_8',
'last_date_of_month_9',
'date_of_last_rech_6',
'date_of_last_rech_7',
'date_of_last_rech_8',
'date_of_last_rech_9',
'date_of_last_rech_data_6',
'date_of_last_rech_data_7',
'date_of_last_rech_data_8',
'date_of_last_rech_data_9']
churn_id = ["mobile_number","circle_id" ]
churn_num = [col for col in churn.columns if col not in churn_date + churn_id]
In [460]:
churn_num
Out[460]:
['loc_og_t2o_mou',
'std_og_t2o_mou',
'loc_ic_t2o_mou',
'arpu_6',
'arpu_7',
'arpu_8',
'arpu_9',
'onnet_mou_6',
'onnet_mou_7',
'onnet_mou_8',
'onnet_mou_9',
'offnet_mou_6',
'offnet_mou_7',
'offnet_mou_8',
'offnet_mou_9',
'roam_ic_mou_6',
'roam_ic_mou_7',
'roam_ic_mou_8',
'roam_ic_mou_9',
'roam_og_mou_6',
'roam_og_mou_7',
'roam_og_mou_8',
'roam_og_mou_9',
'loc_og_t2t_mou_6',
'loc_og_t2t_mou_7',
'loc_og_t2t_mou_8',
'loc_og_t2t_mou_9',
'loc_og_t2m_mou_6',
'loc_og_t2m_mou_7',
'loc_og_t2m_mou_8',
'loc_og_t2m_mou_9',
'loc_og_t2f_mou_6',
'loc_og_t2f_mou_7',
'loc_og_t2f_mou_8',
'loc_og_t2f_mou_9',
'loc_og_t2c_mou_6',
'loc_og_t2c_mou_7',
'loc_og_t2c_mou_8',
'loc_og_t2c_mou_9',
'loc_og_mou_6',
'loc_og_mou_7',
'loc_og_mou_8',
'loc_og_mou_9',
'std_og_t2t_mou_6',
'std_og_t2t_mou_7',
'std_og_t2t_mou_8',
'std_og_t2t_mou_9',
'std_og_t2m_mou_6',
'std_og_t2m_mou_7',
'std_og_t2m_mou_8',
'std_og_t2m_mou_9',
'std_og_t2f_mou_6',
'std_og_t2f_mou_7',
'std_og_t2f_mou_8',
'std_og_t2f_mou_9',
'std_og_t2c_mou_6',
'std_og_t2c_mou_7',
'std_og_t2c_mou_8',
'std_og_t2c_mou_9',
'std_og_mou_6',
'std_og_mou_7',
'std_og_mou_8',
'std_og_mou_9',
'isd_og_mou_6',
'isd_og_mou_7',
'isd_og_mou_8',
'isd_og_mou_9',
'spl_og_mou_6',
'spl_og_mou_7',
'spl_og_mou_8',
'spl_og_mou_9',
'og_others_6',
'og_others_7',
'og_others_8',
'og_others_9',
'total_og_mou_6',
'total_og_mou_7',
'total_og_mou_8',
'total_og_mou_9',
'loc_ic_t2t_mou_6',
'loc_ic_t2t_mou_7',
'loc_ic_t2t_mou_8',
'loc_ic_t2t_mou_9',
'loc_ic_t2m_mou_6',
'loc_ic_t2m_mou_7',
'loc_ic_t2m_mou_8',
'loc_ic_t2m_mou_9',
'loc_ic_t2f_mou_6',
'loc_ic_t2f_mou_7',
'loc_ic_t2f_mou_8',
'loc_ic_t2f_mou_9',
'loc_ic_mou_6',
'loc_ic_mou_7',
'loc_ic_mou_8',
'loc_ic_mou_9',
'std_ic_t2t_mou_6',
'std_ic_t2t_mou_7',
'std_ic_t2t_mou_8',
'std_ic_t2t_mou_9',
'std_ic_t2m_mou_6',
'std_ic_t2m_mou_7',
'std_ic_t2m_mou_8',
'std_ic_t2m_mou_9',
'std_ic_t2f_mou_6',
'std_ic_t2f_mou_7',
'std_ic_t2f_mou_8',
'std_ic_t2f_mou_9',
'std_ic_t2o_mou_6',
'std_ic_t2o_mou_7',
'std_ic_t2o_mou_8',
'std_ic_t2o_mou_9',
'std_ic_mou_6',
'std_ic_mou_7',
'std_ic_mou_8',
'std_ic_mou_9',
'total_ic_mou_6',
'total_ic_mou_7',
'total_ic_mou_8',
'total_ic_mou_9',
'spl_ic_mou_6',
'spl_ic_mou_7',
'spl_ic_mou_8',
'spl_ic_mou_9',
'isd_ic_mou_6',
'isd_ic_mou_7',
'isd_ic_mou_8',
'isd_ic_mou_9',
'ic_others_6',
'ic_others_7',
'ic_others_8',
'ic_others_9',
'total_rech_num_6',
'total_rech_num_7',
'total_rech_num_8',
'total_rech_num_9',
'total_rech_amt_6',
'total_rech_amt_7',
'total_rech_amt_8',
'total_rech_amt_9',
'max_rech_amt_6',
'max_rech_amt_7',
'max_rech_amt_8',
'max_rech_amt_9',
'last_day_rch_amt_6',
'last_day_rch_amt_7',
'last_day_rch_amt_8',
'last_day_rch_amt_9',
'total_rech_data_6',
'total_rech_data_7',
'total_rech_data_8',
'total_rech_data_9',
'max_rech_data_6',
'max_rech_data_7',
'max_rech_data_8',
'max_rech_data_9',
'count_rech_2g_6',
'count_rech_2g_7',
'count_rech_2g_8',
'count_rech_2g_9',
'count_rech_3g_6',
'count_rech_3g_7',
'count_rech_3g_8',
'count_rech_3g_9',
'av_rech_amt_data_6',
'av_rech_amt_data_7',
'av_rech_amt_data_8',
'av_rech_amt_data_9',
'vol_2g_mb_6',
'vol_2g_mb_7',
'vol_2g_mb_8',
'vol_2g_mb_9',
'vol_3g_mb_6',
'vol_3g_mb_7',
'vol_3g_mb_8',
'vol_3g_mb_9',
'arpu_3g_6',
'arpu_3g_7',
'arpu_3g_8',
'arpu_3g_9',
'arpu_2g_6',
'arpu_2g_7',
'arpu_2g_8',
'arpu_2g_9',
'night_pck_user_6',
'night_pck_user_7',
'night_pck_user_8',
'night_pck_user_9',
'monthly_2g_6',
'monthly_2g_7',
'monthly_2g_8',
'monthly_2g_9',
'sachet_2g_6',
'sachet_2g_7',
'sachet_2g_8',
'sachet_2g_9',
'monthly_3g_6',
'monthly_3g_7',
'monthly_3g_8',
'monthly_3g_9',
'sachet_3g_6',
'sachet_3g_7',
'sachet_3g_8',
'sachet_3g_9',
'fb_user_6',
'fb_user_7',
'fb_user_8',
'fb_user_9',
'aon',
'aug_vbc_3g',
'jul_vbc_3g',
'jun_vbc_3g',
'sep_vbc_3g']
In [0]:
# when you look at the CSV file you will realise that the FB and night pack users are actually categorical columns
#coded 0 and 1 for yes and no, so lets seperate them as well
churn_cat = ['fb_user_6',
'fb_user_7',
'fb_user_8',
'fb_user_9','night_pck_user_6',
'night_pck_user_7',
'night_pck_user_8',
'night_pck_user_9']
#new num columns will be as following
churn_num = [col for col in churn_num if col not in churn_cat]
In [462]:
churn_num
Out[462]:
['loc_og_t2o_mou',
'std_og_t2o_mou',
'loc_ic_t2o_mou',
'arpu_6',
'arpu_7',
'arpu_8',
'arpu_9',
'onnet_mou_6',
'onnet_mou_7',
'onnet_mou_8',
'onnet_mou_9',
'offnet_mou_6',
'offnet_mou_7',
'offnet_mou_8',
'offnet_mou_9',
'roam_ic_mou_6',
'roam_ic_mou_7',
'roam_ic_mou_8',
'roam_ic_mou_9',
'roam_og_mou_6',
'roam_og_mou_7',
'roam_og_mou_8',
'roam_og_mou_9',
'loc_og_t2t_mou_6',
'loc_og_t2t_mou_7',
'loc_og_t2t_mou_8',
'loc_og_t2t_mou_9',
'loc_og_t2m_mou_6',
'loc_og_t2m_mou_7',
'loc_og_t2m_mou_8',
'loc_og_t2m_mou_9',
'loc_og_t2f_mou_6',
'loc_og_t2f_mou_7',
'loc_og_t2f_mou_8',
'loc_og_t2f_mou_9',
'loc_og_t2c_mou_6',
'loc_og_t2c_mou_7',
'loc_og_t2c_mou_8',
'loc_og_t2c_mou_9',
'loc_og_mou_6',
'loc_og_mou_7',
'loc_og_mou_8',
'loc_og_mou_9',
'std_og_t2t_mou_6',
'std_og_t2t_mou_7',
'std_og_t2t_mou_8',
'std_og_t2t_mou_9',
'std_og_t2m_mou_6',
'std_og_t2m_mou_7',
'std_og_t2m_mou_8',
'std_og_t2m_mou_9',
'std_og_t2f_mou_6',
'std_og_t2f_mou_7',
'std_og_t2f_mou_8',
'std_og_t2f_mou_9',
'std_og_t2c_mou_6',
'std_og_t2c_mou_7',
'std_og_t2c_mou_8',
'std_og_t2c_mou_9',
'std_og_mou_6',
'std_og_mou_7',
'std_og_mou_8',
'std_og_mou_9',
'isd_og_mou_6',
'isd_og_mou_7',
'isd_og_mou_8',
'isd_og_mou_9',
'spl_og_mou_6',
'spl_og_mou_7',
'spl_og_mou_8',
'spl_og_mou_9',
'og_others_6',
'og_others_7',
'og_others_8',
'og_others_9',
'total_og_mou_6',
'total_og_mou_7',
'total_og_mou_8',
'total_og_mou_9',
'loc_ic_t2t_mou_6',
'loc_ic_t2t_mou_7',
'loc_ic_t2t_mou_8',
'loc_ic_t2t_mou_9',
'loc_ic_t2m_mou_6',
'loc_ic_t2m_mou_7',
'loc_ic_t2m_mou_8',
'loc_ic_t2m_mou_9',
'loc_ic_t2f_mou_6',
'loc_ic_t2f_mou_7',
'loc_ic_t2f_mou_8',
'loc_ic_t2f_mou_9',
'loc_ic_mou_6',
'loc_ic_mou_7',
'loc_ic_mou_8',
'loc_ic_mou_9',
'std_ic_t2t_mou_6',
'std_ic_t2t_mou_7',
'std_ic_t2t_mou_8',
'std_ic_t2t_mou_9',
'std_ic_t2m_mou_6',
'std_ic_t2m_mou_7',
'std_ic_t2m_mou_8',
'std_ic_t2m_mou_9',
'std_ic_t2f_mou_6',
'std_ic_t2f_mou_7',
'std_ic_t2f_mou_8',
'std_ic_t2f_mou_9',
'std_ic_t2o_mou_6',
'std_ic_t2o_mou_7',
'std_ic_t2o_mou_8',
'std_ic_t2o_mou_9',
'std_ic_mou_6',
'std_ic_mou_7',
'std_ic_mou_8',
'std_ic_mou_9',
'total_ic_mou_6',
'total_ic_mou_7',
'total_ic_mou_8',
'total_ic_mou_9',
'spl_ic_mou_6',
'spl_ic_mou_7',
'spl_ic_mou_8',
'spl_ic_mou_9',
'isd_ic_mou_6',
'isd_ic_mou_7',
'isd_ic_mou_8',
'isd_ic_mou_9',
'ic_others_6',
'ic_others_7',
'ic_others_8',
'ic_others_9',
'total_rech_num_6',
'total_rech_num_7',
'total_rech_num_8',
'total_rech_num_9',
'total_rech_amt_6',
'total_rech_amt_7',
'total_rech_amt_8',
'total_rech_amt_9',
'max_rech_amt_6',
'max_rech_amt_7',
'max_rech_amt_8',
'max_rech_amt_9',
'last_day_rch_amt_6',
'last_day_rch_amt_7',
'last_day_rch_amt_8',
'last_day_rch_amt_9',
'total_rech_data_6',
'total_rech_data_7',
'total_rech_data_8',
'total_rech_data_9',
'max_rech_data_6',
'max_rech_data_7',
'max_rech_data_8',
'max_rech_data_9',
'count_rech_2g_6',
'count_rech_2g_7',
'count_rech_2g_8',
'count_rech_2g_9',
'count_rech_3g_6',
'count_rech_3g_7',
'count_rech_3g_8',
'count_rech_3g_9',
'av_rech_amt_data_6',
'av_rech_amt_data_7',
'av_rech_amt_data_8',
'av_rech_amt_data_9',
'vol_2g_mb_6',
'vol_2g_mb_7',
'vol_2g_mb_8',
'vol_2g_mb_9',
'vol_3g_mb_6',
'vol_3g_mb_7',
'vol_3g_mb_8',
'vol_3g_mb_9',
'arpu_3g_6',
'arpu_3g_7',
'arpu_3g_8',
'arpu_3g_9',
'arpu_2g_6',
'arpu_2g_7',
'arpu_2g_8',
'arpu_2g_9',
'monthly_2g_6',
'monthly_2g_7',
'monthly_2g_8',
'monthly_2g_9',
'sachet_2g_6',
'sachet_2g_7',
'sachet_2g_8',
'sachet_2g_9',
'monthly_3g_6',
'monthly_3g_7',
'monthly_3g_8',
'monthly_3g_9',
'sachet_3g_6',
'sachet_3g_7',
'sachet_3g_8',
'sachet_3g_9',
'aon',
'aug_vbc_3g',
'jul_vbc_3g',
'jun_vbc_3g',
'sep_vbc_3g']
In [463]:
round(churn.isnull().sum()/len(churn.index),4)*100
Out[463]:
mobile_number 0.00
circle_id 0.00
loc_og_t2o_mou 1.02
std_og_t2o_mou 1.02
loc_ic_t2o_mou 1.02
last_date_of_month_6 0.00
last_date_of_month_7 0.60
last_date_of_month_8 1.10
last_date_of_month_9 1.66
arpu_6 0.00
arpu_7 0.00
arpu_8 0.00
arpu_9 0.00
onnet_mou_6 3.94
onnet_mou_7 3.86
onnet_mou_8 5.38
onnet_mou_9 7.75
offnet_mou_6 3.94
offnet_mou_7 3.86
offnet_mou_8 5.38
offnet_mou_9 7.75
roam_ic_mou_6 3.94
roam_ic_mou_7 3.86
roam_ic_mou_8 5.38
roam_ic_mou_9 7.75
roam_og_mou_6 3.94
roam_og_mou_7 3.86
roam_og_mou_8 5.38
roam_og_mou_9 7.75
loc_og_t2t_mou_6 3.94
loc_og_t2t_mou_7 3.86
loc_og_t2t_mou_8 5.38
loc_og_t2t_mou_9 7.75
loc_og_t2m_mou_6 3.94
loc_og_t2m_mou_7 3.86
loc_og_t2m_mou_8 5.38
loc_og_t2m_mou_9 7.75
loc_og_t2f_mou_6 3.94
loc_og_t2f_mou_7 3.86
loc_og_t2f_mou_8 5.38
loc_og_t2f_mou_9 7.75
loc_og_t2c_mou_6 3.94
loc_og_t2c_mou_7 3.86
loc_og_t2c_mou_8 5.38
loc_og_t2c_mou_9 7.75
loc_og_mou_6 3.94
loc_og_mou_7 3.86
loc_og_mou_8 5.38
loc_og_mou_9 7.75
std_og_t2t_mou_6 3.94
std_og_t2t_mou_7 3.86
std_og_t2t_mou_8 5.38
std_og_t2t_mou_9 7.75
std_og_t2m_mou_6 3.94
std_og_t2m_mou_7 3.86
std_og_t2m_mou_8 5.38
std_og_t2m_mou_9 7.75
std_og_t2f_mou_6 3.94
std_og_t2f_mou_7 3.86
std_og_t2f_mou_8 5.38
std_og_t2f_mou_9 7.75
std_og_t2c_mou_6 3.94
std_og_t2c_mou_7 3.86
std_og_t2c_mou_8 5.38
std_og_t2c_mou_9 7.75
std_og_mou_6 3.94
std_og_mou_7 3.86
std_og_mou_8 5.38
std_og_mou_9 7.75
isd_og_mou_6 3.94
isd_og_mou_7 3.86
isd_og_mou_8 5.38
isd_og_mou_9 7.75
spl_og_mou_6 3.94
spl_og_mou_7 3.86
spl_og_mou_8 5.38
spl_og_mou_9 7.75
og_others_6 3.94
og_others_7 3.86
og_others_8 5.38
og_others_9 7.75
total_og_mou_6 0.00
total_og_mou_7 0.00
total_og_mou_8 0.00
total_og_mou_9 0.00
loc_ic_t2t_mou_6 3.94
loc_ic_t2t_mou_7 3.86
loc_ic_t2t_mou_8 5.38
loc_ic_t2t_mou_9 7.75
loc_ic_t2m_mou_6 3.94
loc_ic_t2m_mou_7 3.86
loc_ic_t2m_mou_8 5.38
loc_ic_t2m_mou_9 7.75
loc_ic_t2f_mou_6 3.94
loc_ic_t2f_mou_7 3.86
loc_ic_t2f_mou_8 5.38
loc_ic_t2f_mou_9 7.75
loc_ic_mou_6 3.94
loc_ic_mou_7 3.86
loc_ic_mou_8 5.38
loc_ic_mou_9 7.75
std_ic_t2t_mou_6 3.94
std_ic_t2t_mou_7 3.86
std_ic_t2t_mou_8 5.38
std_ic_t2t_mou_9 7.75
std_ic_t2m_mou_6 3.94
std_ic_t2m_mou_7 3.86
std_ic_t2m_mou_8 5.38
std_ic_t2m_mou_9 7.75
std_ic_t2f_mou_6 3.94
std_ic_t2f_mou_7 3.86
std_ic_t2f_mou_8 5.38
std_ic_t2f_mou_9 7.75
std_ic_t2o_mou_6 3.94
std_ic_t2o_mou_7 3.86
std_ic_t2o_mou_8 5.38
std_ic_t2o_mou_9 7.75
std_ic_mou_6 3.94
std_ic_mou_7 3.86
std_ic_mou_8 5.38
std_ic_mou_9 7.75
total_ic_mou_6 0.00
total_ic_mou_7 0.00
total_ic_mou_8 0.00
total_ic_mou_9 0.00
spl_ic_mou_6 3.94
spl_ic_mou_7 3.86
spl_ic_mou_8 5.38
spl_ic_mou_9 7.75
isd_ic_mou_6 3.94
isd_ic_mou_7 3.86
isd_ic_mou_8 5.38
isd_ic_mou_9 7.75
ic_others_6 3.94
ic_others_7 3.86
ic_others_8 5.38
ic_others_9 7.75
total_rech_num_6 0.00
total_rech_num_7 0.00
total_rech_num_8 0.00
total_rech_num_9 0.00
total_rech_amt_6 0.00
total_rech_amt_7 0.00
total_rech_amt_8 0.00
total_rech_amt_9 0.00
max_rech_amt_6 0.00
max_rech_amt_7 0.00
max_rech_amt_8 0.00
max_rech_amt_9 0.00
date_of_last_rech_6 1.61
date_of_last_rech_7 1.77
date_of_last_rech_8 3.62
date_of_last_rech_9 4.76
last_day_rch_amt_6 0.00
last_day_rch_amt_7 0.00
last_day_rch_amt_8 0.00
last_day_rch_amt_9 0.00
date_of_last_rech_data_6 74.85
date_of_last_rech_data_7 74.43
date_of_last_rech_data_8 73.66
date_of_last_rech_data_9 74.08
total_rech_data_6 74.85
total_rech_data_7 74.43
total_rech_data_8 73.66
total_rech_data_9 74.08
max_rech_data_6 74.85
max_rech_data_7 74.43
max_rech_data_8 73.66
max_rech_data_9 74.08
count_rech_2g_6 74.85
count_rech_2g_7 74.43
count_rech_2g_8 73.66
count_rech_2g_9 74.08
count_rech_3g_6 74.85
count_rech_3g_7 74.43
count_rech_3g_8 73.66
count_rech_3g_9 74.08
av_rech_amt_data_6 74.85
av_rech_amt_data_7 74.43
av_rech_amt_data_8 73.66
av_rech_amt_data_9 74.08
vol_2g_mb_6 0.00
vol_2g_mb_7 0.00
vol_2g_mb_8 0.00
vol_2g_mb_9 0.00
vol_3g_mb_6 0.00
vol_3g_mb_7 0.00
vol_3g_mb_8 0.00
vol_3g_mb_9 0.00
arpu_3g_6 74.85
arpu_3g_7 74.43
arpu_3g_8 73.66
arpu_3g_9 74.08
arpu_2g_6 74.85
arpu_2g_7 74.43
arpu_2g_8 73.66
arpu_2g_9 74.08
night_pck_user_6 74.85
night_pck_user_7 74.43
night_pck_user_8 73.66
night_pck_user_9 74.08
monthly_2g_6 0.00
monthly_2g_7 0.00
monthly_2g_8 0.00
monthly_2g_9 0.00
sachet_2g_6 0.00
sachet_2g_7 0.00
sachet_2g_8 0.00
sachet_2g_9 0.00
monthly_3g_6 0.00
monthly_3g_7 0.00
monthly_3g_8 0.00
monthly_3g_9 0.00
sachet_3g_6 0.00
sachet_3g_7 0.00
sachet_3g_8 0.00
sachet_3g_9 0.00
fb_user_6 74.85
fb_user_7 74.43
fb_user_8 73.66
fb_user_9 74.08
aon 0.00
aug_vbc_3g 0.00
jul_vbc_3g 0.00
jun_vbc_3g 0.00
sep_vbc_3g 0.00
dtype: float64
In [0]:
# categorical columns
churn[churn_cat] = churn[churn_cat].apply(lambda x: x.fillna(-1))
In [465]:
round(churn.isnull().sum()/len(churn.index),4)*100
Out[465]:
mobile_number 0.00
circle_id 0.00
loc_og_t2o_mou 1.02
std_og_t2o_mou 1.02
loc_ic_t2o_mou 1.02
last_date_of_month_6 0.00
last_date_of_month_7 0.60
last_date_of_month_8 1.10
last_date_of_month_9 1.66
arpu_6 0.00
arpu_7 0.00
arpu_8 0.00
arpu_9 0.00
onnet_mou_6 3.94
onnet_mou_7 3.86
onnet_mou_8 5.38
onnet_mou_9 7.75
offnet_mou_6 3.94
offnet_mou_7 3.86
offnet_mou_8 5.38
offnet_mou_9 7.75
roam_ic_mou_6 3.94
roam_ic_mou_7 3.86
roam_ic_mou_8 5.38
roam_ic_mou_9 7.75
roam_og_mou_6 3.94
roam_og_mou_7 3.86
roam_og_mou_8 5.38
roam_og_mou_9 7.75
loc_og_t2t_mou_6 3.94
loc_og_t2t_mou_7 3.86
loc_og_t2t_mou_8 5.38
loc_og_t2t_mou_9 7.75
loc_og_t2m_mou_6 3.94
loc_og_t2m_mou_7 3.86
loc_og_t2m_mou_8 5.38
loc_og_t2m_mou_9 7.75
loc_og_t2f_mou_6 3.94
loc_og_t2f_mou_7 3.86
loc_og_t2f_mou_8 5.38
loc_og_t2f_mou_9 7.75
loc_og_t2c_mou_6 3.94
loc_og_t2c_mou_7 3.86
loc_og_t2c_mou_8 5.38
loc_og_t2c_mou_9 7.75
loc_og_mou_6 3.94
loc_og_mou_7 3.86
loc_og_mou_8 5.38
loc_og_mou_9 7.75
std_og_t2t_mou_6 3.94
std_og_t2t_mou_7 3.86
std_og_t2t_mou_8 5.38
std_og_t2t_mou_9 7.75
std_og_t2m_mou_6 3.94
std_og_t2m_mou_7 3.86
std_og_t2m_mou_8 5.38
std_og_t2m_mou_9 7.75
std_og_t2f_mou_6 3.94
std_og_t2f_mou_7 3.86
std_og_t2f_mou_8 5.38
std_og_t2f_mou_9 7.75
std_og_t2c_mou_6 3.94
std_og_t2c_mou_7 3.86
std_og_t2c_mou_8 5.38
std_og_t2c_mou_9 7.75
std_og_mou_6 3.94
std_og_mou_7 3.86
std_og_mou_8 5.38
std_og_mou_9 7.75
isd_og_mou_6 3.94
isd_og_mou_7 3.86
isd_og_mou_8 5.38
isd_og_mou_9 7.75
spl_og_mou_6 3.94
spl_og_mou_7 3.86
spl_og_mou_8 5.38
spl_og_mou_9 7.75
og_others_6 3.94
og_others_7 3.86
og_others_8 5.38
og_others_9 7.75
total_og_mou_6 0.00
total_og_mou_7 0.00
total_og_mou_8 0.00
total_og_mou_9 0.00
loc_ic_t2t_mou_6 3.94
loc_ic_t2t_mou_7 3.86
loc_ic_t2t_mou_8 5.38
loc_ic_t2t_mou_9 7.75
loc_ic_t2m_mou_6 3.94
loc_ic_t2m_mou_7 3.86
loc_ic_t2m_mou_8 5.38
loc_ic_t2m_mou_9 7.75
loc_ic_t2f_mou_6 3.94
loc_ic_t2f_mou_7 3.86
loc_ic_t2f_mou_8 5.38
loc_ic_t2f_mou_9 7.75
loc_ic_mou_6 3.94
loc_ic_mou_7 3.86
loc_ic_mou_8 5.38
loc_ic_mou_9 7.75
std_ic_t2t_mou_6 3.94
std_ic_t2t_mou_7 3.86
std_ic_t2t_mou_8 5.38
std_ic_t2t_mou_9 7.75
std_ic_t2m_mou_6 3.94
std_ic_t2m_mou_7 3.86
std_ic_t2m_mou_8 5.38
std_ic_t2m_mou_9 7.75
std_ic_t2f_mou_6 3.94
std_ic_t2f_mou_7 3.86
std_ic_t2f_mou_8 5.38
std_ic_t2f_mou_9 7.75
std_ic_t2o_mou_6 3.94
std_ic_t2o_mou_7 3.86
std_ic_t2o_mou_8 5.38
std_ic_t2o_mou_9 7.75
std_ic_mou_6 3.94
std_ic_mou_7 3.86
std_ic_mou_8 5.38
std_ic_mou_9 7.75
total_ic_mou_6 0.00
total_ic_mou_7 0.00
total_ic_mou_8 0.00
total_ic_mou_9 0.00
spl_ic_mou_6 3.94
spl_ic_mou_7 3.86
spl_ic_mou_8 5.38
spl_ic_mou_9 7.75
isd_ic_mou_6 3.94
isd_ic_mou_7 3.86
isd_ic_mou_8 5.38
isd_ic_mou_9 7.75
ic_others_6 3.94
ic_others_7 3.86
ic_others_8 5.38
ic_others_9 7.75
total_rech_num_6 0.00
total_rech_num_7 0.00
total_rech_num_8 0.00
total_rech_num_9 0.00
total_rech_amt_6 0.00
total_rech_amt_7 0.00
total_rech_amt_8 0.00
total_rech_amt_9 0.00
max_rech_amt_6 0.00
max_rech_amt_7 0.00
max_rech_amt_8 0.00
max_rech_amt_9 0.00
date_of_last_rech_6 1.61
date_of_last_rech_7 1.77
date_of_last_rech_8 3.62
date_of_last_rech_9 4.76
last_day_rch_amt_6 0.00
last_day_rch_amt_7 0.00
last_day_rch_amt_8 0.00
last_day_rch_amt_9 0.00
date_of_last_rech_data_6 74.85
date_of_last_rech_data_7 74.43
date_of_last_rech_data_8 73.66
date_of_last_rech_data_9 74.08
total_rech_data_6 74.85
total_rech_data_7 74.43
total_rech_data_8 73.66
total_rech_data_9 74.08
max_rech_data_6 74.85
max_rech_data_7 74.43
max_rech_data_8 73.66
max_rech_data_9 74.08
count_rech_2g_6 74.85
count_rech_2g_7 74.43
count_rech_2g_8 73.66
count_rech_2g_9 74.08
count_rech_3g_6 74.85
count_rech_3g_7 74.43
count_rech_3g_8 73.66
count_rech_3g_9 74.08
av_rech_amt_data_6 74.85
av_rech_amt_data_7 74.43
av_rech_amt_data_8 73.66
av_rech_amt_data_9 74.08
vol_2g_mb_6 0.00
vol_2g_mb_7 0.00
vol_2g_mb_8 0.00
vol_2g_mb_9 0.00
vol_3g_mb_6 0.00
vol_3g_mb_7 0.00
vol_3g_mb_8 0.00
vol_3g_mb_9 0.00
arpu_3g_6 74.85
arpu_3g_7 74.43
arpu_3g_8 73.66
arpu_3g_9 74.08
arpu_2g_6 74.85
arpu_2g_7 74.43
arpu_2g_8 73.66
arpu_2g_9 74.08
night_pck_user_6 0.00
night_pck_user_7 0.00
night_pck_user_8 0.00
night_pck_user_9 0.00
monthly_2g_6 0.00
monthly_2g_7 0.00
monthly_2g_8 0.00
monthly_2g_9 0.00
sachet_2g_6 0.00
sachet_2g_7 0.00
sachet_2g_8 0.00
sachet_2g_9 0.00
monthly_3g_6 0.00
monthly_3g_7 0.00
monthly_3g_8 0.00
monthly_3g_9 0.00
sachet_3g_6 0.00
sachet_3g_7 0.00
sachet_3g_8 0.00
sachet_3g_9 0.00
fb_user_6 0.00
fb_user_7 0.00
fb_user_8 0.00
fb_user_9 0.00
aon 0.00
aug_vbc_3g 0.00
jul_vbc_3g 0.00
jun_vbc_3g 0.00
sep_vbc_3g 0.00
dtype: float64
In [466]:
churn.head(5)
Out[466]:
mobile_number
circle_id
loc_og_t2o_mou
std_og_t2o_mou
loc_ic_t2o_mou
last_date_of_month_6
last_date_of_month_7
last_date_of_month_8
last_date_of_month_9
arpu_6
arpu_7
arpu_8
arpu_9
onnet_mou_6
onnet_mou_7
onnet_mou_8
onnet_mou_9
offnet_mou_6
offnet_mou_7
offnet_mou_8
offnet_mou_9
roam_ic_mou_6
roam_ic_mou_7
roam_ic_mou_8
roam_ic_mou_9
roam_og_mou_6
roam_og_mou_7
roam_og_mou_8
roam_og_mou_9
loc_og_t2t_mou_6
loc_og_t2t_mou_7
loc_og_t2t_mou_8
loc_og_t2t_mou_9
loc_og_t2m_mou_6
loc_og_t2m_mou_7
loc_og_t2m_mou_8
loc_og_t2m_mou_9
loc_og_t2f_mou_6
loc_og_t2f_mou_7
loc_og_t2f_mou_8
loc_og_t2f_mou_9
loc_og_t2c_mou_6
loc_og_t2c_mou_7
loc_og_t2c_mou_8
loc_og_t2c_mou_9
loc_og_mou_6
loc_og_mou_7
loc_og_mou_8
loc_og_mou_9
std_og_t2t_mou_6
std_og_t2t_mou_7
std_og_t2t_mou_8
std_og_t2t_mou_9
std_og_t2m_mou_6
std_og_t2m_mou_7
std_og_t2m_mou_8
std_og_t2m_mou_9
std_og_t2f_mou_6
std_og_t2f_mou_7
std_og_t2f_mou_8
std_og_t2f_mou_9
std_og_t2c_mou_6
std_og_t2c_mou_7
std_og_t2c_mou_8
std_og_t2c_mou_9
std_og_mou_6
std_og_mou_7
std_og_mou_8
std_og_mou_9
isd_og_mou_6
isd_og_mou_7
isd_og_mou_8
isd_og_mou_9
spl_og_mou_6
spl_og_mou_7
spl_og_mou_8
spl_og_mou_9
og_others_6
og_others_7
og_others_8
og_others_9
total_og_mou_6
total_og_mou_7
total_og_mou_8
total_og_mou_9
loc_ic_t2t_mou_6
loc_ic_t2t_mou_7
loc_ic_t2t_mou_8
loc_ic_t2t_mou_9
loc_ic_t2m_mou_6
loc_ic_t2m_mou_7
loc_ic_t2m_mou_8
loc_ic_t2m_mou_9
loc_ic_t2f_mou_6
loc_ic_t2f_mou_7
loc_ic_t2f_mou_8
loc_ic_t2f_mou_9
loc_ic_mou_6
loc_ic_mou_7
loc_ic_mou_8
loc_ic_mou_9
std_ic_t2t_mou_6
std_ic_t2t_mou_7
std_ic_t2t_mou_8
std_ic_t2t_mou_9
std_ic_t2m_mou_6
std_ic_t2m_mou_7
std_ic_t2m_mou_8
std_ic_t2m_mou_9
std_ic_t2f_mou_6
std_ic_t2f_mou_7
std_ic_t2f_mou_8
std_ic_t2f_mou_9
std_ic_t2o_mou_6
std_ic_t2o_mou_7
std_ic_t2o_mou_8
std_ic_t2o_mou_9
std_ic_mou_6
std_ic_mou_7
std_ic_mou_8
std_ic_mou_9
total_ic_mou_6
total_ic_mou_7
total_ic_mou_8
total_ic_mou_9
spl_ic_mou_6
spl_ic_mou_7
spl_ic_mou_8
spl_ic_mou_9
isd_ic_mou_6
isd_ic_mou_7
isd_ic_mou_8
isd_ic_mou_9
ic_others_6
ic_others_7
ic_others_8
ic_others_9
total_rech_num_6
total_rech_num_7
total_rech_num_8
total_rech_num_9
total_rech_amt_6
total_rech_amt_7
total_rech_amt_8
total_rech_amt_9
max_rech_amt_6
max_rech_amt_7
max_rech_amt_8
max_rech_amt_9
date_of_last_rech_6
date_of_last_rech_7
date_of_last_rech_8
date_of_last_rech_9
last_day_rch_amt_6
last_day_rch_amt_7
last_day_rch_amt_8
last_day_rch_amt_9
date_of_last_rech_data_6
date_of_last_rech_data_7
date_of_last_rech_data_8
date_of_last_rech_data_9
total_rech_data_6
total_rech_data_7
total_rech_data_8
total_rech_data_9
max_rech_data_6
max_rech_data_7
max_rech_data_8
max_rech_data_9
count_rech_2g_6
count_rech_2g_7
count_rech_2g_8
count_rech_2g_9
count_rech_3g_6
count_rech_3g_7
count_rech_3g_8
count_rech_3g_9
av_rech_amt_data_6
av_rech_amt_data_7
av_rech_amt_data_8
av_rech_amt_data_9
vol_2g_mb_6
vol_2g_mb_7
vol_2g_mb_8
vol_2g_mb_9
vol_3g_mb_6
vol_3g_mb_7
vol_3g_mb_8
vol_3g_mb_9
arpu_3g_6
arpu_3g_7
arpu_3g_8
arpu_3g_9
arpu_2g_6
arpu_2g_7
arpu_2g_8
arpu_2g_9
night_pck_user_6
night_pck_user_7
night_pck_user_8
night_pck_user_9
monthly_2g_6
monthly_2g_7
monthly_2g_8
monthly_2g_9
sachet_2g_6
sachet_2g_7
sachet_2g_8
sachet_2g_9
monthly_3g_6
monthly_3g_7
monthly_3g_8
monthly_3g_9
sachet_3g_6
sachet_3g_7
sachet_3g_8
sachet_3g_9
fb_user_6
fb_user_7
fb_user_8
fb_user_9
aon
aug_vbc_3g
jul_vbc_3g
jun_vbc_3g
sep_vbc_3g
0
7000842753
109
0.0
0.0
0.0
6/30/2014
7/31/2014
8/31/2014
9/30/2014
197.385
214.816
213.803
21.100
NaN
NaN
0.00
NaN
NaN
NaN
0.00
NaN
NaN
NaN
0.00
NaN
NaN
NaN
0.00
NaN
NaN
NaN
0.00
NaN
NaN
NaN
0.00
NaN
NaN
NaN
0.00
NaN
NaN
NaN
0.00
NaN
NaN
NaN
0.00
NaN
NaN
NaN
0.00
NaN
NaN
NaN
0.00
NaN
NaN
NaN
0.00
NaN
NaN
NaN
0.0
NaN
NaN
NaN
0.00
NaN
NaN
NaN
0.0
NaN
NaN
NaN
0.00
NaN
NaN
NaN
0.0
NaN
0.00
0.00
0.00
0.00
NaN
NaN
0.16
NaN
NaN
NaN
4.13
NaN
NaN
NaN
1.15
NaN
NaN
NaN
5.44
NaN
NaN
NaN
0.00
NaN
NaN
NaN
0.00
NaN
NaN
NaN
0.00
NaN
NaN
NaN
0.0
NaN
NaN
NaN
0.00
NaN
0.00
0.00
5.44
0.00
NaN
NaN
0.0
NaN
NaN
NaN
0.0
NaN
NaN
NaN
0.0
NaN
4
3
2
6
362
252
252
0
252
252
252
0
6/21/2014
7/16/2014
8/8/2014
9/28/2014
252
252
252
0
6/21/2014
7/16/2014
8/8/2014
NaN
1.0
1.0
1.0
NaN
252.0
252.0
252.0
NaN
0.0
0.0
0.0
NaN
1.0
1.0
1.0
NaN
252.0
252.0
252.0
NaN
30.13
1.32
5.75
0.0
83.57
150.76
109.61
0.00
212.17
212.17
212.17
NaN
212.17
212.17
212.17
NaN
0.0
0.0
0.0
-1.0
0
0
0
0
0
0
0
0
1
1
1
0
0
0
0
0
1.0
1.0
1.0
-1.0
968
30.4
0.0
101.20
3.58
1
7001865778
109
0.0
0.0
0.0
6/30/2014
7/31/2014
8/31/2014
9/30/2014
34.047
355.074
268.321
86.285
24.11
78.68
7.68
18.34
15.74
99.84
304.76
53.76
0.0
0.00
0.00
0.00
0.0
0.00
0.00
0.00
23.88
74.56
7.68
18.34
11.51
75.94
291.86
53.76
0.00
0.00
0.00
0.00
0.0
2.91
0.00
0.00
35.39
150.51
299.54
72.11
0.23
4.11
0.00
0.00
0.00
0.46
0.13
0.00
0.00
0.00
0.00
0.0
0.0
0.0
0.0
0.0
0.23
4.58
0.13
0.00
0.0
0.0
0.0
0.0
4.68
23.43
12.76
0.00
0.00
0.0
0.0
0.0
40.31
178.53
312.44
72.11
1.61
29.91
29.23
116.09
17.48
65.38
375.58
56.93
0.00
8.93
3.61
0.00
19.09
104.23
408.43
173.03
0.00
0.00
2.35
0.00
5.90
0.00
12.49
15.01
0.00
0.00
0.00
0.00
0.0
0.0
0.0
0.0
5.90
0.00
14.84
15.01
26.83
104.23
423.28
188.04
0.00
0.0
0.0
0.00
1.83
0.00
0.0
0.00
0.00
0.00
0.0
0.00
4
9
11
5
74
384
283
121
44
154
65
50
6/29/2014
7/31/2014
8/28/2014
9/30/2014
44
23
30
0
NaN
7/25/2014
8/10/2014
NaN
NaN
1.0
2.0
NaN
NaN
154.0
25.0
NaN
NaN
1.0
2.0
NaN
NaN
0.0
0.0
NaN
NaN
154.0
50.0
NaN
0.00
108.07
365.47
0.0
0.00
0.00
0.00
0.00
NaN
0.00
0.00
NaN
NaN
28.61
7.60
NaN
-1.0
0.0
0.0
-1.0
0
1
0
0
0
0
2
0
0
0
0
0
0
0
0
0
-1.0
1.0
1.0
-1.0
1006
0.0
0.0
0.00
0.00
2
7001625959
109
0.0
0.0
0.0
6/30/2014
7/31/2014
8/31/2014
9/30/2014
167.690
189.058
210.226
290.714
11.54
55.24
37.26
74.81
143.33
220.59
208.36
118.91
0.0
0.00
0.00
38.49
0.0
0.00
0.00
70.94
7.19
28.74
13.58
14.39
29.34
16.86
38.46
28.16
24.11
21.79
15.61
22.24
0.0
135.54
45.76
0.48
60.66
67.41
67.66
64.81
4.34
26.49
22.58
8.76
41.81
67.41
75.53
9.28
1.48
14.76
22.83
0.0
0.0
0.0
0.0
0.0
47.64
108.68
120.94
18.04
0.0
0.0
0.0
0.0
46.56
236.84
96.84
42.08
0.45
0.0
0.0
0.0
155.33
412.94
285.46
124.94
115.69
71.11
67.46
148.23
14.38
15.44
38.89
38.98
99.48
122.29
49.63
158.19
229.56
208.86
155.99
345.41
72.41
71.29
28.69
49.44
45.18
177.01
167.09
118.18
21.73
58.34
43.23
3.86
0.0
0.0
0.0
0.0
139.33
306.66
239.03
171.49
370.04
519.53
395.03
517.74
0.21
0.0
0.0
0.45
0.00
0.85
0.0
0.01
0.93
3.14
0.0
0.36
5
4
2
7
168
315
116
358
86
200
86
100
6/17/2014
7/24/2014
8/14/2014
9/29/2014
0
200
86
0
NaN
NaN
NaN
9/17/2014
NaN
NaN
NaN
1.0
NaN
NaN
NaN
46.0
NaN
NaN
NaN
1.0
NaN
NaN
NaN
0.0
NaN
NaN
NaN
46.0
0.00
0.00
0.00
0.0
0.00
0.00
0.00
8.42
NaN
NaN
NaN
2.84
NaN
NaN
NaN
0.0
-1.0
-1.0
-1.0
0.0
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0
0
-1.0
-1.0
-1.0
1.0
1103
0.0
0.0
4.17
0.00
3
7001204172
109
0.0
0.0
0.0
6/30/2014
7/31/2014
8/31/2014
9/30/2014
221.338
251.102
508.054
389.500
99.91
54.39
310.98
241.71
123.31
109.01
71.68
113.54
0.0
54.86
44.38
0.00
0.0
28.09
39.04
0.00
73.68
34.81
10.61
15.49
107.43
83.21
22.46
65.46
1.91
0.65
4.91
2.06
0.0
0.00
0.00
0.00
183.03
118.68
37.99
83.03
26.23
14.89
289.58
226.21
2.99
1.73
6.53
9.99
0.00
0.00
0.00
0.0
0.0
0.0
0.0
0.0
29.23
16.63
296.11
236.21
0.0
0.0
0.0
0.0
10.96
0.00
18.09
43.29
0.00
0.0
0.0
0.0
223.23
135.31
352.21
362.54
62.08
19.98
8.04
41.73
113.96
64.51
20.28
52.86
57.43
27.09
19.84
65.59
233.48
111.59
48.18
160.19
43.48
66.44
0.00
129.84
1.33
38.56
4.94
13.98
1.18
0.00
0.00
0.00
0.0
0.0
0.0
0.0
45.99
105.01
4.94
143.83
280.08
216.61
53.13
305.38
0.59
0.0
0.0
0.55
0.00
0.00
0.0
0.00
0.00
0.00
0.0
0.80
10
11
18
14
230
310
601
410
60
50
50
50
6/28/2014
7/31/2014
8/31/2014
9/30/2014
30
50
50
30
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
0.00
0.00
0.00
0.0
0.00
0.00
0.00
0.00
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
-1.0
-1.0
-1.0
-1.0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
-1.0
-1.0
-1.0
-1.0
2491
0.0
0.0
0.00
0.00
4
7000142493
109
0.0
0.0
0.0
6/30/2014
7/31/2014
8/31/2014
9/30/2014
261.636
309.876
238.174
163.426
50.31
149.44
83.89
58.78
76.96
91.88
124.26
45.81
0.0
0.00
0.00
0.00
0.0
0.00
0.00
0.00
50.31
149.44
83.89
58.78
67.64
91.88
124.26
37.89
0.00
0.00
0.00
1.93
0.0
0.00
0.00
0.00
117.96
241.33
208.16
98.61
0.00
0.00
0.00
0.00
9.31
0.00
0.00
0.00
0.00
0.00
0.00
0.0
0.0
0.0
0.0
0.0
9.31
0.00
0.00
0.00
0.0
0.0
0.0
0.0
0.00
0.00
0.00
5.98
0.00
0.0
0.0
0.0
127.28
241.33
208.16
104.59
105.68
88.49
233.81
154.56
106.84
109.54
104.13
48.24
1.50
0.00
0.00
0.00
214.03
198.04
337.94
202.81
0.00
0.00
0.86
2.31
1.93
0.25
0.00
0.00
0.00
0.00
0.00
0.00
0.0
0.0
0.0
0.0
1.93
0.25
0.86
2.31
216.44
198.29
338.81
205.31
0.00
0.0
0.0
0.18
0.00
0.00
0.0
0.00
0.48
0.00
0.0
0.00
5
6
3
4
196
350
287
200
56
110
110
50
6/26/2014
7/28/2014
8/9/2014
9/28/2014
50
110
110
50
6/4/2014
NaN
NaN
NaN
1.0
NaN
NaN
NaN
56.0
NaN
NaN
NaN
1.0
NaN
NaN
NaN
0.0
NaN
NaN
NaN
56.0
NaN
NaN
NaN
0.00
0.00
0.00
0.0
0.00
0.00
0.00
0.00
0.00
NaN
NaN
NaN
0.00
NaN
NaN
NaN
0.0
-1.0
-1.0
-1.0
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0
0
0.0
-1.0
-1.0
-1.0
1526
0.0
0.0
0.00
0.00
In [0]:
#we can impute the recharge columns such as total, avg and max with 0 as null valiue idicates no recharge
i = ["total_rech_data_6"
,"total_rech_data_7"
,"total_rech_data_8"
,"total_rech_data_9"
,"av_rech_amt_data_6"
,"av_rech_amt_data_7"
,"av_rech_amt_data_8"
,"av_rech_amt_data_9"
,"max_rech_data_6"
,"max_rech_data_7"
,"max_rech_data_8"
,"max_rech_data_9"]
churn[i]=churn[i].apply(lambda x: x.fillna(0))
In [468]:
churn[i].isna
Out[468]:
<bound method DataFrame.isna of total_rech_data_6 total_rech_data_7 total_rech_data_8 total_rech_data_9 av_rech_amt_data_6 av_rech_amt_data_7 av_rech_amt_data_8 av_rech_amt_data_9 max_rech_data_6 max_rech_data_7 max_rech_data_8 max_rech_data_9
0 1.0 1.0 1.0 0.0 252.0 252.0 252.0 0.0 252.0 252.0 252.0 0.0
1 0.0 1.0 2.0 0.0 0.0 154.0 50.0 0.0 0.0 154.0 25.0 0.0
2 0.0 0.0 0.0 1.0 0.0 0.0 0.0 46.0 0.0 0.0 0.0 46.0
3 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
4 1.0 0.0 0.0 0.0 56.0 0.0 0.0 0.0 56.0 0.0 0.0 0.0
... ... ... ... ... ... ... ... ... ... ... ... ...
99994 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
99995 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
99996 2.0 0.0 0.0 0.0 39.0 0.0 0.0 0.0 25.0 0.0 0.0 0.0
99997 3.0 2.0 4.0 4.0 583.0 358.0 716.0 862.0 202.0 179.0 179.0 252.0
99998 1.0 0.0 0.0 0.0 154.0 0.0 0.0 0.0 154.0 0.0 0.0 0.0
[99999 rows x 12 columns]>
In [469]:
round(churn.isnull().sum()/len(churn.index),4)*100 > 70
Out[469]:
mobile_number False
circle_id False
loc_og_t2o_mou False
std_og_t2o_mou False
loc_ic_t2o_mou False
last_date_of_month_6 False
last_date_of_month_7 False
last_date_of_month_8 False
last_date_of_month_9 False
arpu_6 False
arpu_7 False
arpu_8 False
arpu_9 False
onnet_mou_6 False
onnet_mou_7 False
onnet_mou_8 False
onnet_mou_9 False
offnet_mou_6 False
offnet_mou_7 False
offnet_mou_8 False
offnet_mou_9 False
roam_ic_mou_6 False
roam_ic_mou_7 False
roam_ic_mou_8 False
roam_ic_mou_9 False
roam_og_mou_6 False
roam_og_mou_7 False
roam_og_mou_8 False
roam_og_mou_9 False
loc_og_t2t_mou_6 False
loc_og_t2t_mou_7 False
loc_og_t2t_mou_8 False
loc_og_t2t_mou_9 False
loc_og_t2m_mou_6 False
loc_og_t2m_mou_7 False
loc_og_t2m_mou_8 False
loc_og_t2m_mou_9 False
loc_og_t2f_mou_6 False
loc_og_t2f_mou_7 False
loc_og_t2f_mou_8 False
loc_og_t2f_mou_9 False
loc_og_t2c_mou_6 False
loc_og_t2c_mou_7 False
loc_og_t2c_mou_8 False
loc_og_t2c_mou_9 False
loc_og_mou_6 False
loc_og_mou_7 False
loc_og_mou_8 False
loc_og_mou_9 False
std_og_t2t_mou_6 False
std_og_t2t_mou_7 False
std_og_t2t_mou_8 False
std_og_t2t_mou_9 False
std_og_t2m_mou_6 False
std_og_t2m_mou_7 False
std_og_t2m_mou_8 False
std_og_t2m_mou_9 False
std_og_t2f_mou_6 False
std_og_t2f_mou_7 False
std_og_t2f_mou_8 False
std_og_t2f_mou_9 False
std_og_t2c_mou_6 False
std_og_t2c_mou_7 False
std_og_t2c_mou_8 False
std_og_t2c_mou_9 False
std_og_mou_6 False
std_og_mou_7 False
std_og_mou_8 False
std_og_mou_9 False
isd_og_mou_6 False
isd_og_mou_7 False
isd_og_mou_8 False
isd_og_mou_9 False
spl_og_mou_6 False
spl_og_mou_7 False
spl_og_mou_8 False
spl_og_mou_9 False
og_others_6 False
og_others_7 False
og_others_8 False
og_others_9 False
total_og_mou_6 False
total_og_mou_7 False
total_og_mou_8 False
total_og_mou_9 False
loc_ic_t2t_mou_6 False
loc_ic_t2t_mou_7 False
loc_ic_t2t_mou_8 False
loc_ic_t2t_mou_9 False
loc_ic_t2m_mou_6 False
loc_ic_t2m_mou_7 False
loc_ic_t2m_mou_8 False
loc_ic_t2m_mou_9 False
loc_ic_t2f_mou_6 False
loc_ic_t2f_mou_7 False
loc_ic_t2f_mou_8 False
loc_ic_t2f_mou_9 False
loc_ic_mou_6 False
loc_ic_mou_7 False
loc_ic_mou_8 False
loc_ic_mou_9 False
std_ic_t2t_mou_6 False
std_ic_t2t_mou_7 False
std_ic_t2t_mou_8 False
std_ic_t2t_mou_9 False
std_ic_t2m_mou_6 False
std_ic_t2m_mou_7 False
std_ic_t2m_mou_8 False
std_ic_t2m_mou_9 False
std_ic_t2f_mou_6 False
std_ic_t2f_mou_7 False
std_ic_t2f_mou_8 False
std_ic_t2f_mou_9 False
std_ic_t2o_mou_6 False
std_ic_t2o_mou_7 False
std_ic_t2o_mou_8 False
std_ic_t2o_mou_9 False
std_ic_mou_6 False
std_ic_mou_7 False
std_ic_mou_8 False
std_ic_mou_9 False
total_ic_mou_6 False
total_ic_mou_7 False
total_ic_mou_8 False
total_ic_mou_9 False
spl_ic_mou_6 False
spl_ic_mou_7 False
spl_ic_mou_8 False
spl_ic_mou_9 False
isd_ic_mou_6 False
isd_ic_mou_7 False
isd_ic_mou_8 False
isd_ic_mou_9 False
ic_others_6 False
ic_others_7 False
ic_others_8 False
ic_others_9 False
total_rech_num_6 False
total_rech_num_7 False
total_rech_num_8 False
total_rech_num_9 False
total_rech_amt_6 False
total_rech_amt_7 False
total_rech_amt_8 False
total_rech_amt_9 False
max_rech_amt_6 False
max_rech_amt_7 False
max_rech_amt_8 False
max_rech_amt_9 False
date_of_last_rech_6 False
date_of_last_rech_7 False
date_of_last_rech_8 False
date_of_last_rech_9 False
last_day_rch_amt_6 False
last_day_rch_amt_7 False
last_day_rch_amt_8 False
last_day_rch_amt_9 False
date_of_last_rech_data_6 True
date_of_last_rech_data_7 True
date_of_last_rech_data_8 True
date_of_last_rech_data_9 True
total_rech_data_6 False
total_rech_data_7 False
total_rech_data_8 False
total_rech_data_9 False
max_rech_data_6 False
max_rech_data_7 False
max_rech_data_8 False
max_rech_data_9 False
count_rech_2g_6 True
count_rech_2g_7 True
count_rech_2g_8 True
count_rech_2g_9 True
count_rech_3g_6 True
count_rech_3g_7 True
count_rech_3g_8 True
count_rech_3g_9 True
av_rech_amt_data_6 False
av_rech_amt_data_7 False
av_rech_amt_data_8 False
av_rech_amt_data_9 False
vol_2g_mb_6 False
vol_2g_mb_7 False
vol_2g_mb_8 False
vol_2g_mb_9 False
vol_3g_mb_6 False
vol_3g_mb_7 False
vol_3g_mb_8 False
vol_3g_mb_9 False
arpu_3g_6 True
arpu_3g_7 True
arpu_3g_8 True
arpu_3g_9 True
arpu_2g_6 True
arpu_2g_7 True
arpu_2g_8 True
arpu_2g_9 True
night_pck_user_6 False
night_pck_user_7 False
night_pck_user_8 False
night_pck_user_9 False
monthly_2g_6 False
monthly_2g_7 False
monthly_2g_8 False
monthly_2g_9 False
sachet_2g_6 False
sachet_2g_7 False
sachet_2g_8 False
sachet_2g_9 False
monthly_3g_6 False
monthly_3g_7 False
monthly_3g_8 False
monthly_3g_9 False
sachet_3g_6 False
sachet_3g_7 False
sachet_3g_8 False
sachet_3g_9 False
fb_user_6 False
fb_user_7 False
fb_user_8 False
fb_user_9 False
aon False
aug_vbc_3g False
jul_vbc_3g False
jun_vbc_3g False
sep_vbc_3g False
dtype: bool
In [0]:
j=["date_of_last_rech_data_6",'date_of_last_rech_data_7',
'date_of_last_rech_data_8','date_of_last_rech_data_9']
churn[j]=churn[j].apply(lambda x: x.fillna(0))
There are many columns with more than 70% null values These columns don't add any value to the data analysis, so lets remove them
In [471]:
#now lets remove these columns
removecolumn = churn.columns[100*(churn.isnull().sum()/len(churn.index)) > 70]
print(removecolumn)
Index(['count_rech_2g_6', 'count_rech_2g_7', 'count_rech_2g_8', 'count_rech_2g_9', 'count_rech_3g_6', 'count_rech_3g_7', 'count_rech_3g_8', 'count_rech_3g_9', 'arpu_3g_6', 'arpu_3g_7', 'arpu_3g_8', 'arpu_3g_9', 'arpu_2g_6', 'arpu_2g_7', 'arpu_2g_8', 'arpu_2g_9'], dtype='object')
In [472]:
churn=churn.drop(removecolumn,axis=1)
churn.shape
Out[472]:
(99999, 210)
In [473]:
#lets drop id columns as well
churn=churn.drop(churn_id,axis=1)
churn.shape
Out[473]:
(99999, 208)
In [474]:
#identifying columns with missing values >0
x=churn.columns[100*(churn.isnull().sum()/len(churn.index)) > 0]
x
Out[474]:
Index(['loc_og_t2o_mou', 'std_og_t2o_mou', 'loc_ic_t2o_mou', 'last_date_of_month_7', 'last_date_of_month_8', 'last_date_of_month_9', 'onnet_mou_6', 'onnet_mou_7', 'onnet_mou_8', 'onnet_mou_9',
...
'isd_ic_mou_8', 'isd_ic_mou_9', 'ic_others_6', 'ic_others_7', 'ic_others_8', 'ic_others_9', 'date_of_last_rech_6', 'date_of_last_rech_7', 'date_of_last_rech_8', 'date_of_last_rech_9'], dtype='object', length=126)
In [0]:
#imputing with 0
churn[x]=churn[x].apply(lambda x: x.fillna(0))
In [476]:
round(churn.isnull().sum()/len(churn.index),4)*100
Out[476]:
loc_og_t2o_mou 0.0
std_og_t2o_mou 0.0
loc_ic_t2o_mou 0.0
last_date_of_month_6 0.0
last_date_of_month_7 0.0
last_date_of_month_8 0.0
last_date_of_month_9 0.0
arpu_6 0.0
arpu_7 0.0
arpu_8 0.0
arpu_9 0.0
onnet_mou_6 0.0
onnet_mou_7 0.0
onnet_mou_8 0.0
onnet_mou_9 0.0
offnet_mou_6 0.0
offnet_mou_7 0.0
offnet_mou_8 0.0
offnet_mou_9 0.0
roam_ic_mou_6 0.0
roam_ic_mou_7 0.0
roam_ic_mou_8 0.0
roam_ic_mou_9 0.0
roam_og_mou_6 0.0
roam_og_mou_7 0.0
roam_og_mou_8 0.0
roam_og_mou_9 0.0
loc_og_t2t_mou_6 0.0
loc_og_t2t_mou_7 0.0
loc_og_t2t_mou_8 0.0
loc_og_t2t_mou_9 0.0
loc_og_t2m_mou_6 0.0
loc_og_t2m_mou_7 0.0
loc_og_t2m_mou_8 0.0
loc_og_t2m_mou_9 0.0
loc_og_t2f_mou_6 0.0
loc_og_t2f_mou_7 0.0
loc_og_t2f_mou_8 0.0
loc_og_t2f_mou_9 0.0
loc_og_t2c_mou_6 0.0
loc_og_t2c_mou_7 0.0
loc_og_t2c_mou_8 0.0
loc_og_t2c_mou_9 0.0
loc_og_mou_6 0.0
loc_og_mou_7 0.0
loc_og_mou_8 0.0
loc_og_mou_9 0.0
std_og_t2t_mou_6 0.0
std_og_t2t_mou_7 0.0
std_og_t2t_mou_8 0.0
std_og_t2t_mou_9 0.0
std_og_t2m_mou_6 0.0
std_og_t2m_mou_7 0.0
std_og_t2m_mou_8 0.0
std_og_t2m_mou_9 0.0
std_og_t2f_mou_6 0.0
std_og_t2f_mou_7 0.0
std_og_t2f_mou_8 0.0
std_og_t2f_mou_9 0.0
std_og_t2c_mou_6 0.0
std_og_t2c_mou_7 0.0
std_og_t2c_mou_8 0.0
std_og_t2c_mou_9 0.0
std_og_mou_6 0.0
std_og_mou_7 0.0
std_og_mou_8 0.0
std_og_mou_9 0.0
isd_og_mou_6 0.0
isd_og_mou_7 0.0
isd_og_mou_8 0.0
isd_og_mou_9 0.0
spl_og_mou_6 0.0
spl_og_mou_7 0.0
spl_og_mou_8 0.0
spl_og_mou_9 0.0
og_others_6 0.0
og_others_7 0.0
og_others_8 0.0
og_others_9 0.0
total_og_mou_6 0.0
total_og_mou_7 0.0
total_og_mou_8 0.0
total_og_mou_9 0.0
loc_ic_t2t_mou_6 0.0
loc_ic_t2t_mou_7 0.0
loc_ic_t2t_mou_8 0.0
loc_ic_t2t_mou_9 0.0
loc_ic_t2m_mou_6 0.0
loc_ic_t2m_mou_7 0.0
loc_ic_t2m_mou_8 0.0
loc_ic_t2m_mou_9 0.0
loc_ic_t2f_mou_6 0.0
loc_ic_t2f_mou_7 0.0
loc_ic_t2f_mou_8 0.0
loc_ic_t2f_mou_9 0.0
loc_ic_mou_6 0.0
loc_ic_mou_7 0.0
loc_ic_mou_8 0.0
loc_ic_mou_9 0.0
std_ic_t2t_mou_6 0.0
std_ic_t2t_mou_7 0.0
std_ic_t2t_mou_8 0.0
std_ic_t2t_mou_9 0.0
std_ic_t2m_mou_6 0.0
std_ic_t2m_mou_7 0.0
std_ic_t2m_mou_8 0.0
std_ic_t2m_mou_9 0.0
std_ic_t2f_mou_6 0.0
std_ic_t2f_mou_7 0.0
std_ic_t2f_mou_8 0.0
std_ic_t2f_mou_9 0.0
std_ic_t2o_mou_6 0.0
std_ic_t2o_mou_7 0.0
std_ic_t2o_mou_8 0.0
std_ic_t2o_mou_9 0.0
std_ic_mou_6 0.0
std_ic_mou_7 0.0
std_ic_mou_8 0.0
std_ic_mou_9 0.0
total_ic_mou_6 0.0
total_ic_mou_7 0.0
total_ic_mou_8 0.0
total_ic_mou_9 0.0
spl_ic_mou_6 0.0
spl_ic_mou_7 0.0
spl_ic_mou_8 0.0
spl_ic_mou_9 0.0
isd_ic_mou_6 0.0
isd_ic_mou_7 0.0
isd_ic_mou_8 0.0
isd_ic_mou_9 0.0
ic_others_6 0.0
ic_others_7 0.0
ic_others_8 0.0
ic_others_9 0.0
total_rech_num_6 0.0
total_rech_num_7 0.0
total_rech_num_8 0.0
total_rech_num_9 0.0
total_rech_amt_6 0.0
total_rech_amt_7 0.0
total_rech_amt_8 0.0
total_rech_amt_9 0.0
max_rech_amt_6 0.0
max_rech_amt_7 0.0
max_rech_amt_8 0.0
max_rech_amt_9 0.0
date_of_last_rech_6 0.0
date_of_last_rech_7 0.0
date_of_last_rech_8 0.0
date_of_last_rech_9 0.0
last_day_rch_amt_6 0.0
last_day_rch_amt_7 0.0
last_day_rch_amt_8 0.0
last_day_rch_amt_9 0.0
date_of_last_rech_data_6 0.0
date_of_last_rech_data_7 0.0
date_of_last_rech_data_8 0.0
date_of_last_rech_data_9 0.0
total_rech_data_6 0.0
total_rech_data_7 0.0
total_rech_data_8 0.0
total_rech_data_9 0.0
max_rech_data_6 0.0
max_rech_data_7 0.0
max_rech_data_8 0.0
max_rech_data_9 0.0
av_rech_amt_data_6 0.0
av_rech_amt_data_7 0.0
av_rech_amt_data_8 0.0
av_rech_amt_data_9 0.0
vol_2g_mb_6 0.0
vol_2g_mb_7 0.0
vol_2g_mb_8 0.0
vol_2g_mb_9 0.0
vol_3g_mb_6 0.0
vol_3g_mb_7 0.0
vol_3g_mb_8 0.0
vol_3g_mb_9 0.0
night_pck_user_6 0.0
night_pck_user_7 0.0
night_pck_user_8 0.0
night_pck_user_9 0.0
monthly_2g_6 0.0
monthly_2g_7 0.0
monthly_2g_8 0.0
monthly_2g_9 0.0
sachet_2g_6 0.0
sachet_2g_7 0.0
sachet_2g_8 0.0
sachet_2g_9 0.0
monthly_3g_6 0.0
monthly_3g_7 0.0
monthly_3g_8 0.0
monthly_3g_9 0.0
sachet_3g_6 0.0
sachet_3g_7 0.0
sachet_3g_8 0.0
sachet_3g_9 0.0
fb_user_6 0.0
fb_user_7 0.0
fb_user_8 0.0
fb_user_9 0.0
aon 0.0
aug_vbc_3g 0.0
jul_vbc_3g 0.0
jun_vbc_3g 0.0
sep_vbc_3g 0.0
dtype: float64
In [477]:
churn.nunique()
Out[477]:
loc_og_t2o_mou 1
std_og_t2o_mou 1
loc_ic_t2o_mou 1
last_date_of_month_6 1
last_date_of_month_7 2
last_date_of_month_8 2
last_date_of_month_9 2
arpu_6 85681
arpu_7 85308
arpu_8 83615
arpu_9 79937
onnet_mou_6 24313
onnet_mou_7 24336
onnet_mou_8 24089
onnet_mou_9 23565
offnet_mou_6 31140
offnet_mou_7 31023
offnet_mou_8 30908
offnet_mou_9 30077
roam_ic_mou_6 6512
roam_ic_mou_7 5230
roam_ic_mou_8 5315
roam_ic_mou_9 4827
roam_og_mou_6 8038
roam_og_mou_7 6639
roam_og_mou_8 6504
roam_og_mou_9 5882
loc_og_t2t_mou_6 13539
loc_og_t2t_mou_7 13411
loc_og_t2t_mou_8 13336
loc_og_t2t_mou_9 12949
loc_og_t2m_mou_6 20905
loc_og_t2m_mou_7 20637
loc_og_t2m_mou_8 20544
loc_og_t2m_mou_9 20141
loc_og_t2f_mou_6 3860
loc_og_t2f_mou_7 3863
loc_og_t2f_mou_8 3807
loc_og_t2f_mou_9 3758
loc_og_t2c_mou_6 2235
loc_og_t2c_mou_7 2426
loc_og_t2c_mou_8 2516
loc_og_t2c_mou_9 2332
loc_og_mou_6 26372
loc_og_mou_7 26091
loc_og_mou_8 25990
loc_og_mou_9 25376
std_og_t2t_mou_6 18244
std_og_t2t_mou_7 18567
std_og_t2t_mou_8 18291
std_og_t2t_mou_9 17934
std_og_t2m_mou_6 19734
std_og_t2m_mou_7 20018
std_og_t2m_mou_8 19786
std_og_t2m_mou_9 19052
std_og_t2f_mou_6 2450
std_og_t2f_mou_7 2391
std_og_t2f_mou_8 2333
std_og_t2f_mou_9 2295
std_og_t2c_mou_6 1
std_og_t2c_mou_7 1
std_og_t2c_mou_8 1
std_og_t2c_mou_9 1
std_og_mou_6 27502
std_og_mou_7 27951
std_og_mou_8 27491
std_og_mou_9 26553
isd_og_mou_6 1381
isd_og_mou_7 1380
isd_og_mou_8 1276
isd_og_mou_9 1255
spl_og_mou_6 3965
spl_og_mou_7 4396
spl_og_mou_8 4390
spl_og_mou_9 4095
og_others_6 1018
og_others_7 187
og_others_8 216
og_others_9 235
total_og_mou_6 40327
total_og_mou_7 40477
total_og_mou_8 40074
total_og_mou_9 39160
loc_ic_t2t_mou_6 13540
loc_ic_t2t_mou_7 13511
loc_ic_t2t_mou_8 13346
loc_ic_t2t_mou_9 12993
loc_ic_t2m_mou_6 22065
loc_ic_t2m_mou_7 21918
loc_ic_t2m_mou_8 21886
loc_ic_t2m_mou_9 21484
loc_ic_t2f_mou_6 7250
loc_ic_t2f_mou_7 7395
loc_ic_t2f_mou_8 7097
loc_ic_t2f_mou_9 7091
loc_ic_mou_6 28569
loc_ic_mou_7 28390
loc_ic_mou_8 28200
loc_ic_mou_9 27697
std_ic_t2t_mou_6 6279
std_ic_t2t_mou_7 6481
std_ic_t2t_mou_8 6352
std_ic_t2t_mou_9 6157
std_ic_t2m_mou_6 9308
std_ic_t2m_mou_7 9464
std_ic_t2m_mou_8 9304
std_ic_t2m_mou_9 8933
std_ic_t2f_mou_6 3125
std_ic_t2f_mou_7 3209
std_ic_t2f_mou_8 3051
std_ic_t2f_mou_9 3090
std_ic_t2o_mou_6 1
std_ic_t2o_mou_7 1
std_ic_t2o_mou_8 1
std_ic_t2o_mou_9 1
std_ic_mou_6 11646
std_ic_mou_7 11889
std_ic_mou_8 11662
std_ic_mou_9 11266
total_ic_mou_6 32247
total_ic_mou_7 32242
total_ic_mou_8 32128
total_ic_mou_9 31260
spl_ic_mou_6 84
spl_ic_mou_7 107
spl_ic_mou_8 102
spl_ic_mou_9 384
isd_ic_mou_6 5521
isd_ic_mou_7 5789
isd_ic_mou_8 5844
isd_ic_mou_9 5557
ic_others_6 1817
ic_others_7 2002
ic_others_8 1896
ic_others_9 1923
total_rech_num_6 102
total_rech_num_7 101
total_rech_num_8 96
total_rech_num_9 97
total_rech_amt_6 2305
total_rech_amt_7 2329
total_rech_amt_8 2347
total_rech_amt_9 2304
max_rech_amt_6 202
max_rech_amt_7 183
max_rech_amt_8 213
max_rech_amt_9 201
date_of_last_rech_6 31
date_of_last_rech_7 32
date_of_last_rech_8 32
date_of_last_rech_9 31
last_day_rch_amt_6 186
last_day_rch_amt_7 173
last_day_rch_amt_8 199
last_day_rch_amt_9 185
date_of_last_rech_data_6 31
date_of_last_rech_data_7 32
date_of_last_rech_data_8 32
date_of_last_rech_data_9 31
total_rech_data_6 38
total_rech_data_7 43
total_rech_data_8 47
total_rech_data_9 38
max_rech_data_6 49
max_rech_data_7 49
max_rech_data_8 51
max_rech_data_9 51
av_rech_amt_data_6 888
av_rech_amt_data_7 962
av_rech_amt_data_8 974
av_rech_amt_data_9 946
vol_2g_mb_6 15201
vol_2g_mb_7 15114
vol_2g_mb_8 14994
vol_2g_mb_9 13919
vol_3g_mb_6 13773
vol_3g_mb_7 14519
vol_3g_mb_8 14960
vol_3g_mb_9 14472
night_pck_user_6 3
night_pck_user_7 3
night_pck_user_8 3
night_pck_user_9 3
monthly_2g_6 5
monthly_2g_7 6
monthly_2g_8 6
monthly_2g_9 5
sachet_2g_6 32
sachet_2g_7 35
sachet_2g_8 34
sachet_2g_9 32
monthly_3g_6 12
monthly_3g_7 15
monthly_3g_8 12
monthly_3g_9 11
sachet_3g_6 25
sachet_3g_7 27
sachet_3g_8 29
sachet_3g_9 27
fb_user_6 3
fb_user_7 3
fb_user_8 3
fb_user_9 3
aon 3489
aug_vbc_3g 14676
jul_vbc_3g 14162
jun_vbc_3g 13312
sep_vbc_3g 3720
dtype: int64
In [478]:
#dropping columns with only one unique value as they don'y add value
churn = churn.drop(['loc_og_t2o_mou', 'std_og_t2o_mou', 'loc_ic_t2o_mou'],axis=1)
churn.shape
Out[478]:
(99999, 205)
we can filter high value customers based on the amount of recharge they have done in the good months (6&7)
total recharge amount = data recharge + normal recharge amount
data recharge = total_rech_data*av_rech_amt_data
In [0]:
#data recharge
churn['total_data_rech_6'] = churn['total_rech_data_6'] * churn['av_rech_amt_data_6']
churn['total_data_rech_7'] = churn['total_rech_data_7'] * churn['av_rech_amt_data_7']
In [0]:
#total recharge
#Hemant : does total_rech_amt doesnt already include total_data_rech ?
churn['amt_data_6'] = churn[['total_rech_amt_6','total_data_rech_6']].sum(axis=1)
churn['amt_data_7'] = churn[['total_rech_amt_7','total_data_rech_7']].sum(axis=1)
In [481]:
#average of both months to filter the customers
churn['AVG_amt_data_6_7'] = churn[['amt_data_6','amt_data_7']].mean(axis=1)
churn[['total_rech_amt_6','total_rech_amt_7','AVG_amt_data_6_7']].quantile(np.linspace(.1, 1, 9, 0))
Out[481]:
total_rech_amt_6
total_rech_amt_7
AVG_amt_data_6_7
0.1
0.0
0.0
60.0
0.2
80.0
65.0
108.5
0.3
120.0
110.0
155.0
0.4
170.0
160.0
209.5
0.5
230.0
220.0
275.0
0.6
290.0
283.0
360.0
0.7
378.0
370.0
478.0
0.8
504.0
500.0
656.5
0.9
732.0
731.0
1028.0
In [482]:
churn_highvalue = churn.loc[churn.AVG_amt_data_6_7 >= churn.AVG_amt_data_6_7.quantile(0.7), :]
churn_highvalue = churn_highvalue.reset_index(drop=True)
churn_highvalue.shape
Out[482]:
(30001, 210)
customers whose outgoing and incoming calls minutes = 0
and useage of data 2g and 3g is 0, those customers are tagged as churned or 1
In [0]:
#tagging churn vs non churn
# Hemant , changed to AND
churn_highvalue['churn_tag'] = np.where(
((churn_highvalue['total_ic_mou_9'] == 0.00) & (churn_highvalue['total_og_mou_9'] == 0.00))
& ((churn_highvalue['vol_2g_mb_9'] == 0.00) & (churn_highvalue['vol_3g_mb_9'] == 0.00))
, 1, 0)
In [484]:
# change data type to category
churn_highvalue.churn_tag = churn_highvalue.churn_tag.astype("category")
# print churn ratio
print("Churn Ratio:")
print(churn_highvalue.churn_tag.value_counts()*100/churn_highvalue.shape[0])
Churn Ratio:
0 91.863605
1 8.136395
Name: churn_tag, dtype: float64
Before proceeding lets drop all the 9th month columns and, the columns of 6,7 that are not required
In [0]:
#removing all the 9th month columns
churn_highvalue = churn_highvalue.drop(churn_highvalue.filter(regex='_9|sep', axis = 1).columns, axis=1)
In [486]:
# delete variables created to filter high-value customers
churn_highvalue = churn_highvalue.drop(['total_data_rech_6', 'total_data_rech_7',
'amt_data_6', 'amt_data_7', 'AVG_amt_data_6_7'], axis=1)
churn_highvalue.shape
Out[486]:
(30001, 155)
In [487]:
round((churn_highvalue.shape[0]/churn.shape[0]),4)*100
Out[487]:
30.0
30% of customers are high value
In [488]:
churn_highvalue.info(verbose="true")
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 30001 entries, 0 to 30000
Data columns (total 155 columns):
# Column Dtype
--- ------ -----
0 last_date_of_month_6 object
1 last_date_of_month_7 object
2 last_date_of_month_8 object
3 arpu_6 float64
4 arpu_7 float64
5 arpu_8 float64
6 onnet_mou_6 float64
7 onnet_mou_7 float64
8 onnet_mou_8 float64
9 offnet_mou_6 float64
10 offnet_mou_7 float64
11 offnet_mou_8 float64
12 roam_ic_mou_6 float64
13 roam_ic_mou_7 float64
14 roam_ic_mou_8 float64
15 roam_og_mou_6 float64
16 roam_og_mou_7 float64
17 roam_og_mou_8 float64
18 loc_og_t2t_mou_6 float64
19 loc_og_t2t_mou_7 float64
20 loc_og_t2t_mou_8 float64
21 loc_og_t2m_mou_6 float64
22 loc_og_t2m_mou_7 float64
23 loc_og_t2m_mou_8 float64
24 loc_og_t2f_mou_6 float64
25 loc_og_t2f_mou_7 float64
26 loc_og_t2f_mou_8 float64
27 loc_og_t2c_mou_6 float64
28 loc_og_t2c_mou_7 float64
29 loc_og_t2c_mou_8 float64
30 loc_og_mou_6 float64
31 loc_og_mou_7 float64
32 loc_og_mou_8 float64
33 std_og_t2t_mou_6 float64
34 std_og_t2t_mou_7 float64
35 std_og_t2t_mou_8 float64
36 std_og_t2m_mou_6 float64
37 std_og_t2m_mou_7 float64
38 std_og_t2m_mou_8 float64
39 std_og_t2f_mou_6 float64
40 std_og_t2f_mou_7 float64
41 std_og_t2f_mou_8 float64
42 std_og_t2c_mou_6 float64
43 std_og_t2c_mou_7 float64
44 std_og_t2c_mou_8 float64
45 std_og_mou_6 float64
46 std_og_mou_7 float64
47 std_og_mou_8 float64
48 isd_og_mou_6 float64
49 isd_og_mou_7 float64
50 isd_og_mou_8 float64
51 spl_og_mou_6 float64
52 spl_og_mou_7 float64
53 spl_og_mou_8 float64
54 og_others_6 float64
55 og_others_7 float64
56 og_others_8 float64
57 total_og_mou_6 float64
58 total_og_mou_7 float64
59 total_og_mou_8 float64
60 loc_ic_t2t_mou_6 float64
61 loc_ic_t2t_mou_7 float64
62 loc_ic_t2t_mou_8 float64
63 loc_ic_t2m_mou_6 float64
64 loc_ic_t2m_mou_7 float64
65 loc_ic_t2m_mou_8 float64
66 loc_ic_t2f_mou_6 float64
67 loc_ic_t2f_mou_7 float64
68 loc_ic_t2f_mou_8 float64
69 loc_ic_mou_6 float64
70 loc_ic_mou_7 float64
71 loc_ic_mou_8 float64
72 std_ic_t2t_mou_6 float64
73 std_ic_t2t_mou_7 float64
74 std_ic_t2t_mou_8 float64
75 std_ic_t2m_mou_6 float64
76 std_ic_t2m_mou_7 float64
77 std_ic_t2m_mou_8 float64
78 std_ic_t2f_mou_6 float64
79 std_ic_t2f_mou_7 float64
80 std_ic_t2f_mou_8 float64
81 std_ic_t2o_mou_6 float64
82 std_ic_t2o_mou_7 float64
83 std_ic_t2o_mou_8 float64
84 std_ic_mou_6 float64
85 std_ic_mou_7 float64
86 std_ic_mou_8 float64
87 total_ic_mou_6 float64
88 total_ic_mou_7 float64
89 total_ic_mou_8 float64
90 spl_ic_mou_6 float64
91 spl_ic_mou_7 float64
92 spl_ic_mou_8 float64
93 isd_ic_mou_6 float64
94 isd_ic_mou_7 float64
95 isd_ic_mou_8 float64
96 ic_others_6 float64
97 ic_others_7 float64
98 ic_others_8 float64
99 total_rech_num_6 int64
100 total_rech_num_7 int64
101 total_rech_num_8 int64
102 total_rech_amt_6 int64
103 total_rech_amt_7 int64
104 total_rech_amt_8 int64
105 max_rech_amt_6 int64
106 max_rech_amt_7 int64
107 max_rech_amt_8 int64
108 date_of_last_rech_6 object
109 date_of_last_rech_7 object
110 date_of_last_rech_8 object
111 last_day_rch_amt_6 int64
112 last_day_rch_amt_7 int64
113 last_day_rch_amt_8 int64
114 date_of_last_rech_data_6 object
115 date_of_last_rech_data_7 object
116 date_of_last_rech_data_8 object
117 total_rech_data_6 float64
118 total_rech_data_7 float64
119 total_rech_data_8 float64
120 max_rech_data_6 float64
121 max_rech_data_7 float64
122 max_rech_data_8 float64
123 av_rech_amt_data_6 float64
124 av_rech_amt_data_7 float64
125 av_rech_amt_data_8 float64
126 vol_2g_mb_6 float64
127 vol_2g_mb_7 float64
128 vol_2g_mb_8 float64
129 vol_3g_mb_6 float64
130 vol_3g_mb_7 float64
131 vol_3g_mb_8 float64
132 night_pck_user_6 float64
133 night_pck_user_7 float64
134 night_pck_user_8 float64
135 monthly_2g_6 int64
136 monthly_2g_7 int64
137 monthly_2g_8 int64
138 sachet_2g_6 int64
139 sachet_2g_7 int64
140 sachet_2g_8 int64
141 monthly_3g_6 int64
142 monthly_3g_7 int64
143 monthly_3g_8 int64
144 sachet_3g_6 int64
145 sachet_3g_7 int64
146 sachet_3g_8 int64
147 fb_user_6 float64
148 fb_user_7 float64
149 fb_user_8 float64
150 aon int64
151 aug_vbc_3g float64
152 jul_vbc_3g float64
153 jun_vbc_3g float64
154 churn_tag category
dtypes: category(1), float64(120), int64(25), object(9)
memory usage: 35.3+ MB
In [489]:
churn_v1 = churn_highvalue
churn_v1
Out[489]:
last_date_of_month_6
last_date_of_month_7
last_date_of_month_8
arpu_6
arpu_7
arpu_8
onnet_mou_6
onnet_mou_7
onnet_mou_8
offnet_mou_6
offnet_mou_7
offnet_mou_8
roam_ic_mou_6
roam_ic_mou_7
roam_ic_mou_8
roam_og_mou_6
roam_og_mou_7
roam_og_mou_8
loc_og_t2t_mou_6
loc_og_t2t_mou_7
loc_og_t2t_mou_8
loc_og_t2m_mou_6
loc_og_t2m_mou_7
loc_og_t2m_mou_8
loc_og_t2f_mou_6
loc_og_t2f_mou_7
loc_og_t2f_mou_8
loc_og_t2c_mou_6
loc_og_t2c_mou_7
loc_og_t2c_mou_8
loc_og_mou_6
loc_og_mou_7
loc_og_mou_8
std_og_t2t_mou_6
std_og_t2t_mou_7
std_og_t2t_mou_8
std_og_t2m_mou_6
std_og_t2m_mou_7
std_og_t2m_mou_8
std_og_t2f_mou_6
std_og_t2f_mou_7
std_og_t2f_mou_8
std_og_t2c_mou_6
std_og_t2c_mou_7
std_og_t2c_mou_8
std_og_mou_6
std_og_mou_7
std_og_mou_8
isd_og_mou_6
isd_og_mou_7
isd_og_mou_8
spl_og_mou_6
spl_og_mou_7
spl_og_mou_8
og_others_6
og_others_7
og_others_8
total_og_mou_6
total_og_mou_7
total_og_mou_8
loc_ic_t2t_mou_6
loc_ic_t2t_mou_7
loc_ic_t2t_mou_8
loc_ic_t2m_mou_6
loc_ic_t2m_mou_7
loc_ic_t2m_mou_8
loc_ic_t2f_mou_6
loc_ic_t2f_mou_7
loc_ic_t2f_mou_8
loc_ic_mou_6
loc_ic_mou_7
loc_ic_mou_8
std_ic_t2t_mou_6
std_ic_t2t_mou_7
std_ic_t2t_mou_8
std_ic_t2m_mou_6
std_ic_t2m_mou_7
std_ic_t2m_mou_8
std_ic_t2f_mou_6
std_ic_t2f_mou_7
std_ic_t2f_mou_8
std_ic_t2o_mou_6
std_ic_t2o_mou_7
std_ic_t2o_mou_8
std_ic_mou_6
std_ic_mou_7
std_ic_mou_8
total_ic_mou_6
total_ic_mou_7
total_ic_mou_8
spl_ic_mou_6
spl_ic_mou_7
spl_ic_mou_8
isd_ic_mou_6
isd_ic_mou_7
isd_ic_mou_8
ic_others_6
ic_others_7
ic_others_8
total_rech_num_6
total_rech_num_7
total_rech_num_8
total_rech_amt_6
total_rech_amt_7
total_rech_amt_8
max_rech_amt_6
max_rech_amt_7
max_rech_amt_8
date_of_last_rech_6
date_of_last_rech_7
date_of_last_rech_8
last_day_rch_amt_6
last_day_rch_amt_7
last_day_rch_amt_8
date_of_last_rech_data_6
date_of_last_rech_data_7
date_of_last_rech_data_8
total_rech_data_6
total_rech_data_7
total_rech_data_8
max_rech_data_6
max_rech_data_7
max_rech_data_8
av_rech_amt_data_6
av_rech_amt_data_7
av_rech_amt_data_8
vol_2g_mb_6
vol_2g_mb_7
vol_2g_mb_8
vol_3g_mb_6
vol_3g_mb_7
vol_3g_mb_8
night_pck_user_6
night_pck_user_7
night_pck_user_8
monthly_2g_6
monthly_2g_7
monthly_2g_8
sachet_2g_6
sachet_2g_7
sachet_2g_8
monthly_3g_6
monthly_3g_7
monthly_3g_8
sachet_3g_6
sachet_3g_7
sachet_3g_8
fb_user_6
fb_user_7
fb_user_8
aon
aug_vbc_3g
jul_vbc_3g
jun_vbc_3g
churn_tag
0
6/30/2014
7/31/2014
8/31/2014
197.385
214.816
213.803
0.00
0.00
0.00
0.00
0.00
0.00
0.00
0.00
0.00
0.00
0.00
0.00
0.00
0.00
0.00
0.00
0.00
0.00
0.00
0.00
0.00
0.00
0.00
0.00
0.00
0.00
0.00
0.00
0.00
0.00
0.00
0.00
0.00
0.00
0.00
0.00
0.0
0.0
0.0
0.00
0.00
0.00
0.0
0.00
0.00
0.00
0.00
0.00
0.00
0.0
0.0
0.00
0.00
0.00
0.00
0.00
0.16
0.00
0.00
4.13
0.00
0.00
1.15
0.00
0.00
5.44
0.00
0.00
0.00
0.00
0.00
0.00
0.00
0.00
0.00
0.0
0.0
0.0
0.00
0.00
0.00
0.00
0.00
5.44
0.00
0.0
0.0
0.00
0.00
0.00
0.00
0.00
0.00
4
3
2
362
252
252
252
252
252
6/21/2014
7/16/2014
8/8/2014
252
252
252
6/21/2014
7/16/2014
8/8/2014
1.0
1.0
1.0
252.0
252.0
252.0
252.0
252.0
252.0
30.13
1.32
5.75
83.57
150.76
109.61
0.0
0.0
0.0
0
0
0
0
0
0
1
1
1
0
0
0
1.0
1.0
1.0
968
30.40
0.00
101.20
1
1
6/30/2014
7/31/2014
8/31/2014
1069.180
1349.850
3171.480
57.84
54.68
52.29
453.43
567.16
325.91
16.23
33.49
31.64
23.74
12.59
38.06
51.39
31.38
40.28
308.63
447.38
162.28
62.13
55.14
53.23
0.00
0.00
0.00
422.16
533.91
255.79
4.30
23.29
12.01
49.89
31.76
49.14
6.66
20.08
16.68
0.0
0.0
0.0
60.86
75.14
77.84
0.0
0.18
10.01
4.50
0.00
6.50
0.00
0.0
0.0
487.53
609.24
350.16
58.14
32.26
27.31
217.56
221.49
121.19
152.16
101.46
39.53
427.88
355.23
188.04
36.89
11.83
30.39
91.44
126.99
141.33
52.19
34.24
22.21
0.0
0.0
0.0
180.54
173.08
193.94
626.46
558.04
428.74
0.21
0.0
0.0
2.06
14.53
31.59
15.74
15.19
15.14
5
5
7
1580
790
3638
1580
790
1580
6/27/2014
7/25/2014
8/26/2014
0
0
779
0
0
0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.00
0.00
0.00
0.00
0.00
0.00
-1.0
-1.0
-1.0
0
0
0
0
0
0
0
0
0
0
0
0
-1.0
-1.0
-1.0
802
57.74
19.38
18.74
1
2
6/30/2014
7/31/2014
8/31/2014
378.721
492.223
137.362
413.69
351.03
35.08
94.66
80.63
136.48
0.00
0.00
0.00
0.00
0.00
0.00
297.13
217.59
12.49
80.96
70.58
50.54
0.00
0.00
0.00
0.00
0.00
7.15
378.09
288.18
63.04
116.56
133.43
22.58
13.69
10.04
75.69
0.00
0.00
0.00
0.0
0.0
0.0
130.26
143.48
98.28
0.0
0.00
0.00
0.00
0.00
10.23
0.00
0.0
0.0
508.36
431.66
171.56
23.84
9.84
0.31
57.58
13.98
15.48
0.00
0.00
0.00
81.43
23.83
15.79
0.00
0.58
0.10
22.43
4.08
0.65
0.00
0.00
0.00
0.0
0.0
0.0
22.43
4.66
0.75
103.86
28.49
16.54
0.00
0.0
0.0
0.00
0.00
0.00
0.00
0.00
0.00
19
21
14
437
601
120
90
154
30
6/25/2014
7/31/2014
8/30/2014
50
0
10
0
7/31/2014
8/23/2014
0.0
2.0
3.0
0.0
154.0
23.0
0.0
177.0
69.0
0.00
356.00
0.03
0.00
750.95
11.94
-1.0
0.0
0.0
0
1
0
0
1
3
0
0
0
0
0
0
-1.0
1.0
1.0
315
21.03
910.65
122.16
0
3
6/30/2014
7/31/2014
8/31/2014
514.453
597.753
637.760
102.41
132.11
85.14
757.93
896.68
983.39
0.00
0.00
0.00
0.00
0.00
0.00
4.48
6.16
23.34
91.81
87.93
104.81
0.75
0.00
1.58
0.00
0.00
0.00
97.04
94.09
129.74
97.93
125.94
61.79
665.36
808.74
876.99
0.00
0.00
0.00
0.0
0.0
0.0
763.29
934.69
938.79
0.0
0.00
0.00
0.00
0.00
0.00
0.00
0.0
0.0
860.34
1028.79
1068.54
2.48
10.19
19.54
118.23
74.63
129.16
4.61
2.84
10.39
125.33
87.68
159.11
14.06
5.98
0.18
67.69
38.23
101.74
0.00
0.00
0.00
0.0
0.0
0.0
81.76
44.21
101.93
207.09
131.89
261.04
0.00
0.0
0.0
0.00
0.00
0.00
0.00
0.00
0.00
22
26
27
600
680
718
50
50
50
6/30/2014
7/31/2014
8/31/2014
30
20
50
0
0
0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.00
0.00
0.00
0.00
0.00
0.00
-1.0
-1.0
-1.0
0
0
0
0
0
0
0
0
0
0
0
0
-1.0
-1.0
-1.0
720
0.00
0.00
0.00
0
4
6/30/2014
7/31/2014
8/31/2014
74.350
193.897
366.966
48.96
50.66
33.58
85.41
89.36
205.89
0.00
0.00
0.00
0.00
0.00
0.00
48.96
50.66
33.58
82.94
83.01
148.56
0.00
0.00
0.00
0.00
0.00
17.71
131.91
133.68
182.14
0.00
0.00
0.00
2.36
6.35
39.61
0.00
0.00
0.00
0.0
0.0
0.0
2.36
6.35
39.61
0.0
0.01
0.00
0.10
0.00
17.71
0.00
0.0
0.0
134.38
140.04
239.48
20.71
61.04
76.64
95.91
113.36
146.84
0.00
0.00
0.71
116.63
174.41
224.21
0.51
0.00
13.38
2.43
14.89
43.91
0.00
0.00
0.00
0.0
0.0
0.0
2.94
14.89
57.29
119.58
222.89
298.33
0.00
0.0
0.0
0.00
28.23
3.74
0.00
5.35
13.06
3
5
4
0
454
439
0
179
179
6/18/2014
7/7/2014
8/24/2014
0
179
0
0
7/7/2014
8/6/2014
0.0
2.0
2.0
0.0
179.0
179.0
0.0
356.0
270.0
0.00
0.48
0.01
0.00
599.09
1009.92
-1.0
0.0
0.0
0
0
0
0
0
0
0
2
1
0
0
1
-1.0
1.0
1.0
604
40.45
51.86
0.00
0
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
29996
6/30/2014
7/31/2014
8/31/2014
384.316
255.405
393.474
78.68
29.04
103.24
56.13
28.09
61.44
0.00
0.00
0.00
0.00
0.00
0.00
72.53
29.04
89.23
52.21
20.89
55.59
0.00
0.00
5.76
3.91
0.00
0.00
124.74
49.94
150.59
6.15
0.00
14.01
0.00
0.00
0.00
0.00
0.00
0.00
0.0
0.0
0.0
6.15
0.00
14.01
0.0
0.00
0.00
3.91
7.20
0.08
0.00
0.0
0.0
134.81
57.14
164.69
285.33
264.44
303.61
93.36
61.56
58.54
0.30
11.26
40.41
378.99
337.28
402.58
57.60
0.91
17.36
0.00
0.00
0.00
41.59
6.51
0.00
0.0
0.0
0.0
99.19
7.43
17.36
478.48
344.78
420.46
0.13
0.0
0.0
0.00
0.06
0.00
0.15
0.00
0.51
2
4
3
252
372
512
252
252
252
6/17/2014
7/25/2014
8/27/2014
252
0
130
6/17/2014
7/14/2014
8/23/2014
1.0
1.0
1.0
252.0
252.0
252.0
252.0
252.0
252.0
54.81
101.02
112.07
692.72
596.91
1012.70
0.0
0.0
0.0
0
0
0
0
0
0
1
1
1
0
0
0
1.0
1.0
1.0
473
1100.43
619.59
668.05
0
29997
6/30/2014
7/31/2014
8/31/2014
328.594
202.966
118.707
423.99
181.83
5.71
39.51
39.81
18.26
0.00
0.00
0.00
0.00
0.00
0.00
423.99
181.83
5.71
17.96
20.46
10.98
0.00
0.00
0.00
17.04
15.38
7.28
441.96
202.29
16.69
0.00
0.00
0.00
0.03
3.96
0.00
0.00
0.00
0.00
0.0
0.0
0.0
0.03
3.96
0.00
0.0
0.00
0.00
21.51
15.38
7.28
1.06
0.0
0.0
464.58
221.64
23.98
32.21
45.14
20.94
53.49
76.76
81.86
0.00
0.00
0.00
85.71
121.91
102.81
0.00
0.00
0.00
2.33
0.75
0.00
0.99
1.04
0.00
0.0
0.0
0.0
3.33
1.79
0.00
89.48
123.71
102.81
0.43
0.0
0.0
0.00
0.00
0.00
0.00
0.00
0.00
8
10
9
360
239
137
154
46
30
6/21/2014
7/31/2014
8/27/2014
0
25
25
6/15/2014
7/31/2014
8/27/2014
1.0
4.0
5.0
154.0
46.0
25.0
154.0
121.0
117.0
1248.50
725.05
202.22
0.00
0.00
0.00
0.0
0.0
0.0
1
0
0
0
4
5
0
0
0
0
0
0
1.0
1.0
1.0
820
0.00
0.00
0.00
0
29998
6/30/2014
7/31/2014
8/31/2014
644.973
455.228
564.334
806.73
549.36
775.41
784.76
617.13
595.44
0.00
0.00
0.00
0.00
0.00
0.00
709.21
496.14
718.56
574.93
546.84
493.48
16.28
19.48
13.01
15.96
16.71
9.15
1300.43
1062.48
1225.06
97.51
53.21
56.84
186.88
50.79
88.94
0.00
0.00
0.00
0.0
0.0
0.0
284.39
104.01
145.79
0.0
0.00
0.00
15.96
16.71
9.15
0.00
0.0
0.0
1600.79
1183.21
1380.01
140.71
104.04
148.21
395.58
475.33
450.01
29.96
38.69
37.61
566.26
618.08
635.84
2.31
0.00
0.00
12.14
1.03
23.71
0.00
2.73
0.45
0.0
0.0
0.0
14.46
3.76
24.16
580.73
622.28
660.01
0.00
0.0
0.0
0.00
0.43
0.00
0.00
0.00
0.00
6
8
3
567
1130
25
550
786
25
6/20/2014
7/28/2014
8/18/2014
550
786
0
6/17/2014
7/18/2014
8/14/2014
1.0
1.0
1.0
17.0
14.0
25.0
17.0
14.0
25.0
34.28
16.41
6.47
736.01
1129.34
926.78
0.0
0.0
0.0
0
0
0
1
1
1
0
0
0
0
0
0
1.0
1.0
1.0
2696
497.45
598.67
604.08
0
29999
6/30/2014
7/31/2014
8/31/2014
312.558
512.932
402.080
199.89
174.46
2.46
175.88
277.01
248.33
0.00
0.00
0.00
0.00
0.00
0.00
170.28
146.48
2.46
137.83
148.78
128.01
0.00
0.00
0.00
0.00
0.00
0.01
308.11
295.26
130.48
29.61
27.98
0.00
38.04
128.23
120.29
0.00
0.00
0.00
0.0
0.0
0.0
67.66
156.21
120.29
0.0
0.00
0.00
0.00
0.00
0.01
0.00
0.0
0.0
375.78
451.48
250.79
47.56
3.90
1.50
245.31
256.46
1122.83
14.43
28.39
20.31
307.31
288.76
1144.64
10.08
13.21
2.03
680.34
72.99
86.11
1.01
0.00
0.71
0.0
0.0
0.0
691.44
86.21
88.86
998.96
374.98
1233.83
0.00
0.0
0.0
0.00
0.00
0.00
0.20
0.00
0.31
12
12
9
380
554
504
50
154
154
6/30/2014
7/28/2014
8/29/2014
50
50
50
0
7/26/2014
8/23/2014
0.0
1.0
1.0
0.0
154.0
154.0
0.0
154.0
154.0
0.00
0.00
0.79
0.00
13.56
678.74
-1.0
0.0
0.0
0
1
1
0
0
0
0
0
0
0
0
0
-1.0
1.0
1.0
328
104.73
0.00
0.00
0
30000
6/30/2014
7/31/2014
8/31/2014
322.991
303.386
606.817
0.00
0.00
0.00
0.00
0.00
0.00
0.00
0.00
0.00
0.00
0.00
0.00
0.00
0.00
0.00
0.00
0.00
0.00
0.00
0.00
0.00
0.00
0.00
0.00
0.00
0.00
0.00
0.00
0.00
0.00
0.00
0.00
0.00
0.00
0.00
0.00
0.0
0.0
0.0
0.00
0.00
0.00
0.0
0.00
0.00
0.00
0.00
0.00
0.00
0.0
0.0
0.00
0.00
0.00
0.00
0.70
0.00
0.81
10.39
1.71
0.00
0.00
0.00
0.81
11.09
1.71
0.00
0.00
0.00
0.00
0.00
0.58
0.00
0.00
0.00
0.0
0.0
0.0
0.00
0.00
0.58
0.81
11.09
2.29
0.00
0.0
0.0
0.00
0.00
0.00
0.00
0.00
0.00
3
4
4
381
358
716
202
179
179
6/17/2014
7/19/2014
8/20/2014
202
179
179
6/17/2014
7/19/2014
8/20/2014
3.0
2.0
4.0
202.0
179.0
179.0
583.0
358.0
716.0
76.51
241.77
136.47
1453.63
1382.08
2683.30
0.0
0.0
0.0
0
0
0
1
0
0
1
2
4
1
0
0
1.0
1.0
1.0
478
1445.74
1151.03
1173.18
0
30001 rows × 155 columns
In [0]:
def plotCategoricalVariables(columnName,dataFrame):
sns.set(style="whitegrid")
ax = (dataFrame.groupby(columnName).size()/len(dataFrame.index)).plot.bar(color=sns.color_palette('dark', 15))
ax.set(xlabel= columnName, ylabel='Count')
plt.show()
In [491]:
plt.figure(figsize=(7,4))
plotCategoricalVariables("churn_tag", churn_v1)
In [492]:
churn_v1.info(verbose=True)
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 30001 entries, 0 to 30000
Data columns (total 155 columns):
# Column Dtype
--- ------ -----
0 last_date_of_month_6 object
1 last_date_of_month_7 object
2 last_date_of_month_8 object
3 arpu_6 float64
4 arpu_7 float64
5 arpu_8 float64
6 onnet_mou_6 float64
7 onnet_mou_7 float64
8 onnet_mou_8 float64
9 offnet_mou_6 float64
10 offnet_mou_7 float64
11 offnet_mou_8 float64
12 roam_ic_mou_6 float64
13 roam_ic_mou_7 float64
14 roam_ic_mou_8 float64
15 roam_og_mou_6 float64
16 roam_og_mou_7 float64
17 roam_og_mou_8 float64
18 loc_og_t2t_mou_6 float64
19 loc_og_t2t_mou_7 float64
20 loc_og_t2t_mou_8 float64
21 loc_og_t2m_mou_6 float64
22 loc_og_t2m_mou_7 float64
23 loc_og_t2m_mou_8 float64
24 loc_og_t2f_mou_6 float64
25 loc_og_t2f_mou_7 float64
26 loc_og_t2f_mou_8 float64
27 loc_og_t2c_mou_6 float64
28 loc_og_t2c_mou_7 float64
29 loc_og_t2c_mou_8 float64
30 loc_og_mou_6 float64
31 loc_og_mou_7 float64
32 loc_og_mou_8 float64
33 std_og_t2t_mou_6 float64
34 std_og_t2t_mou_7 float64
35 std_og_t2t_mou_8 float64
36 std_og_t2m_mou_6 float64
37 std_og_t2m_mou_7 float64
38 std_og_t2m_mou_8 float64
39 std_og_t2f_mou_6 float64
40 std_og_t2f_mou_7 float64
41 std_og_t2f_mou_8 float64
42 std_og_t2c_mou_6 float64
43 std_og_t2c_mou_7 float64
44 std_og_t2c_mou_8 float64
45 std_og_mou_6 float64
46 std_og_mou_7 float64
47 std_og_mou_8 float64
48 isd_og_mou_6 float64
49 isd_og_mou_7 float64
50 isd_og_mou_8 float64
51 spl_og_mou_6 float64
52 spl_og_mou_7 float64
53 spl_og_mou_8 float64
54 og_others_6 float64
55 og_others_7 float64
56 og_others_8 float64
57 total_og_mou_6 float64
58 total_og_mou_7 float64
59 total_og_mou_8 float64
60 loc_ic_t2t_mou_6 float64
61 loc_ic_t2t_mou_7 float64
62 loc_ic_t2t_mou_8 float64
63 loc_ic_t2m_mou_6 float64
64 loc_ic_t2m_mou_7 float64
65 loc_ic_t2m_mou_8 float64
66 loc_ic_t2f_mou_6 float64
67 loc_ic_t2f_mou_7 float64
68 loc_ic_t2f_mou_8 float64
69 loc_ic_mou_6 float64
70 loc_ic_mou_7 float64
71 loc_ic_mou_8 float64
72 std_ic_t2t_mou_6 float64
73 std_ic_t2t_mou_7 float64
74 std_ic_t2t_mou_8 float64
75 std_ic_t2m_mou_6 float64
76 std_ic_t2m_mou_7 float64
77 std_ic_t2m_mou_8 float64
78 std_ic_t2f_mou_6 float64
79 std_ic_t2f_mou_7 float64
80 std_ic_t2f_mou_8 float64
81 std_ic_t2o_mou_6 float64
82 std_ic_t2o_mou_7 float64
83 std_ic_t2o_mou_8 float64
84 std_ic_mou_6 float64
85 std_ic_mou_7 float64
86 std_ic_mou_8 float64
87 total_ic_mou_6 float64
88 total_ic_mou_7 float64
89 total_ic_mou_8 float64
90 spl_ic_mou_6 float64
91 spl_ic_mou_7 float64
92 spl_ic_mou_8 float64
93 isd_ic_mou_6 float64
94 isd_ic_mou_7 float64
95 isd_ic_mou_8 float64
96 ic_others_6 float64
97 ic_others_7 float64
98 ic_others_8 float64
99 total_rech_num_6 int64
100 total_rech_num_7 int64
101 total_rech_num_8 int64
102 total_rech_amt_6 int64
103 total_rech_amt_7 int64
104 total_rech_amt_8 int64
105 max_rech_amt_6 int64
106 max_rech_amt_7 int64
107 max_rech_amt_8 int64
108 date_of_last_rech_6 object
109 date_of_last_rech_7 object
110 date_of_last_rech_8 object
111 last_day_rch_amt_6 int64
112 last_day_rch_amt_7 int64
113 last_day_rch_amt_8 int64
114 date_of_last_rech_data_6 object
115 date_of_last_rech_data_7 object
116 date_of_last_rech_data_8 object
117 total_rech_data_6 float64
118 total_rech_data_7 float64
119 total_rech_data_8 float64
120 max_rech_data_6 float64
121 max_rech_data_7 float64
122 max_rech_data_8 float64
123 av_rech_amt_data_6 float64
124 av_rech_amt_data_7 float64
125 av_rech_amt_data_8 float64
126 vol_2g_mb_6 float64
127 vol_2g_mb_7 float64
128 vol_2g_mb_8 float64
129 vol_3g_mb_6 float64
130 vol_3g_mb_7 float64
131 vol_3g_mb_8 float64
132 night_pck_user_6 float64
133 night_pck_user_7 float64
134 night_pck_user_8 float64
135 monthly_2g_6 int64
136 monthly_2g_7 int64
137 monthly_2g_8 int64
138 sachet_2g_6 int64
139 sachet_2g_7 int64
140 sachet_2g_8 int64
141 monthly_3g_6 int64
142 monthly_3g_7 int64
143 monthly_3g_8 int64
144 sachet_3g_6 int64
145 sachet_3g_7 int64
146 sachet_3g_8 int64
147 fb_user_6 float64
148 fb_user_7 float64
149 fb_user_8 float64
150 aon int64
151 aug_vbc_3g float64
152 jul_vbc_3g float64
153 jun_vbc_3g float64
154 churn_tag category
dtypes: category(1), float64(120), int64(25), object(9)
memory usage: 35.3+ MB
The dataset is imbalanced , only 8% churn customers. We need class balancing.
In [0]:
#Neeed balancing by synthetic generation of new churn records
Y = churn_v1.pop("churn_tag")
X = churn_v1.select_dtypes(exclude=['object'])
In [494]:
Y.head()
Out[494]:
0 1
1 1
2 0
3 0
4 0
Name: churn_tag, dtype: category
Categories (2, int64): [0, 1]
In [0]:
# Test Train Split should happen before balancing should be applied only on train data
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X,Y, train_size=0.8,test_size=0.2,random_state=1)
Balancing the DataSet
In [496]:
#Before Class Balancing
Y_DF = pd.DataFrame(Y_train)
plotCategoricalVariables("churn_tag", Y_DF)
In [497]:
# ADASYN Balancing , ADASYN is more advanced than SMOTE
from imblearn.over_sampling import ADASYN
adaSyn = ADASYN()
X_Train_Bal, Y_Train_Bal = adaSyn.fit_sample(X_train, Y_train)
print(X_train.shape)
print(X_Train_Bal.shape)
/usr/local/lib/python3.6/dist-packages/sklearn/utils/deprecation.py:87: FutureWarning: Function safe_indexing is deprecated; safe_indexing is deprecated in version 0.22 and will be removed in version 0.24.
warnings.warn(msg, category=FutureWarning)
(24000, 145)
(43722, 145)
In [498]:
Y_Train_Bal_DF = pd.DataFrame(Y_Train_Bal, columns = {"churn_tag"})
plotCategoricalVariables("churn_tag", Y_Train_Bal_DF)
AdaSyn Balanced Training Set
Standardization Of Data
In [0]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_Train_Bal_Standardized = scaler.fit_transform(X_Train_Bal)
X_Test_Standardized = scaler.transform(X_test)
Applying PCA
In [500]:
from sklearn.decomposition import PCA
pca = PCA(random_state=100)
pca.fit(X_Train_Bal_Standardized)
Out[500]:
PCA(copy=True, iterated_power='auto', n_components=None, random_state=100,
svd_solver='auto', tol=0.0, whiten=False)
In [501]:
pca.components_
Out[501]:
array([[ 1.23124594e-01, 1.32918417e-01, 1.09281082e-01, ...,
-7.06306859e-02, -8.16028378e-02, -7.90099206e-02],
[ 6.49853219e-02, 9.65534940e-02, 1.42321643e-01, ...,
1.23030389e-01, 1.13296255e-01, 9.77905591e-02],
[ 9.29517373e-02, 1.47201067e-01, 1.57385030e-01, ...,
1.10895432e-01, 1.10334179e-01, 9.24613249e-02],
...,
[-0.00000000e+00, 1.04511239e-16, 1.04066389e-16, ...,
2.51955741e-17, 4.26830765e-17, -4.21585639e-17],
[-0.00000000e+00, 1.99423369e-17, -4.80545229e-17, ...,
-1.11348533e-16, -1.21596755e-16, -1.41379300e-17],
[-0.00000000e+00, -8.24241638e-17, -4.83002959e-17, ...,
2.82336961e-17, 2.20256174e-17, -1.70258139e-16]])
In [502]:
pca.explained_variance_ratio_
Out[502]:
array([1.11096162e-01, 9.40039022e-02, 5.88367391e-02, 4.80784319e-02,
3.87452684e-02, 3.29300221e-02, 2.98569103e-02, 2.80631279e-02,
2.68648857e-02, 2.35067893e-02, 2.23296896e-02, 2.10839844e-02,
1.98166379e-02, 1.86055813e-02, 1.78535514e-02, 1.70829830e-02,
1.59485888e-02, 1.50291335e-02, 1.47079620e-02, 1.37674102e-02,
1.35791504e-02, 1.28076827e-02, 1.26426809e-02, 1.04149132e-02,
1.00899948e-02, 9.41386459e-03, 9.19761727e-03, 9.06832370e-03,
8.96448940e-03, 8.52577628e-03, 8.13255866e-03, 7.50156760e-03,
7.05254684e-03, 6.90174930e-03, 6.86674529e-03, 6.75575092e-03,
6.49036909e-03, 6.48010113e-03, 6.14915986e-03, 6.10364541e-03,
5.99738218e-03, 5.69626265e-03, 5.43753448e-03, 5.09305338e-03,
4.87404622e-03, 4.80485145e-03, 4.36732961e-03, 4.24018836e-03,
4.21708484e-03, 4.06962568e-03, 4.02498484e-03, 3.92082227e-03,
3.85384595e-03, 3.74068801e-03, 3.64367497e-03, 3.53286264e-03,
3.32835203e-03, 3.30233358e-03, 3.18500737e-03, 3.00272055e-03,
2.93912289e-03, 2.89800809e-03, 2.76171892e-03, 2.69014252e-03,
2.56382086e-03, 2.52873772e-03, 2.45887183e-03, 2.40748616e-03,
2.39756450e-03, 2.35177049e-03, 2.28732412e-03, 2.25033015e-03,
2.15664524e-03, 2.02567192e-03, 1.99955288e-03, 1.96574109e-03,
1.90379946e-03, 1.87639250e-03, 1.78929013e-03, 1.73071952e-03,
1.70095982e-03, 1.65556842e-03, 1.56082646e-03, 1.51594784e-03,
1.45518837e-03, 1.44026567e-03, 1.43273624e-03, 1.40090210e-03,
1.27679887e-03, 1.24597921e-03, 1.21420029e-03, 1.20326062e-03,
9.22089073e-04, 9.07572968e-04, 8.78269841e-04, 8.50618984e-04,
8.37832610e-04, 8.05568120e-04, 7.43992043e-04, 7.03474172e-04,
6.86629004e-04, 6.67611184e-04, 6.01632307e-04, 4.74260300e-04,
4.11107998e-04, 3.83107378e-04, 3.03208302e-04, 2.52920712e-04,
2.23651974e-04, 1.66524114e-04, 1.59471719e-04, 1.26793562e-04,
8.74889059e-05, 2.63965013e-05, 1.22953111e-05, 1.87290055e-06,
6.87351616e-07, 4.78085835e-07, 9.38129408e-12, 4.28420430e-12,
3.75471558e-12, 3.54353374e-12, 2.12613774e-12, 1.64922624e-12,
1.33828093e-12, 1.08202370e-12, 9.57970097e-13, 8.06462007e-13,
7.82566299e-13, 7.49389108e-13, 4.39265565e-13, 2.92777323e-13,
2.27646777e-13, 1.48111297e-13, 1.10716690e-13, 9.57235434e-14,
2.82184747e-31, 2.19098548e-32, 1.71077606e-33, 4.19989612e-34,
4.19989612e-34, 4.19989612e-34, 4.19989612e-34, 4.19989612e-34,
1.03105999e-34])
In [503]:
plt.bar(range(1,len(pca.explained_variance_ratio_)+1), pca.explained_variance_ratio_)
Out[503]:
<BarContainer object of 145 artists>
In [0]:
var_cumu = np.cumsum(pca.explained_variance_ratio_)
In [505]:
fig = plt.figure(figsize = (15,10))
plt.plot(range(1,len(var_cumu)+1), var_cumu)
Out[505]:
[<matplotlib.lines.Line2D at 0x7f6692498ef0>]
40 Features explain ~ 85 % variance and 60 features ~ 90 % , with 100 features variance explained is almost 100 %
In [0]:
# for better model , we will take 40 features
In [508]:
pca40Components = PCA(n_components = 40, random_state=100)
X_Train_Bal_Standardized_PCA40 = pca40Components.fit_transform(X_Train_Bal_Standardized)
X_Test_Standardized_PCA40 = pca40Components.transform(X_Test_Standardized)
print(X_Train_Bal_Standardized_PCA40.shape)
print(X_Test_Standardized_PCA40.shape)
(43722, 40)
(6001, 40)
Now the Data is Balanced, Standardized and of Reduced Dimensionality
We need to apply
3 Models for good Prediction
1 Model for Interpretability
In [0]:
from sklearn.model_selection import KFold
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_val_score
from sklearn.tree import DecisionTreeClassifier
from sklearn import metrics
In [0]:
searchParamGrid = {
'max_depth': range(5,15,3),
'min_samples_leaf': range(50, 500, 50),
'min_samples_split': range(100, 500, 100),
}
In [0]:
dtc=DecisionTreeClassifier(random_state=100)
grid_search = GridSearchCV(estimator = dtc, param_grid = searchParamGrid,
cv = 5, verbose=2, n_jobs = 4)
In [530]:
grid_search.fit(X_Train_Bal_Standardized_PCA40,Y_Train_Bal )
Fitting 5 folds for each of 144 candidates, totalling 720 fits
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
---------------------------------------------------------------------------
KeyboardInterrupt Traceback (most recent call last)
<ipython-input-530-d03a25f39490> in <module>()
----> 1 grid_search.fit(X_Train_Bal_Standardized_PCA40,Y_Train_Bal )
/usr/local/lib/python3.6/dist-packages/sklearn/model_selection/_search.py in fit(self, X, y, groups, **fit_params)
708 return results
709
--> 710 self._run_search(evaluate_candidates)
711
712 # For multi-metric evaluation, store the best_index_, best_params_ and
/usr/local/lib/python3.6/dist-packages/sklearn/model_selection/_search.py in _run_search(self, evaluate_candidates)
1149 def _run_search(self, evaluate_candidates):
1150 """Search all candidates in param_grid"""
-> 1151 evaluate_candidates(ParameterGrid(self.param_grid))
1152
1153
/usr/local/lib/python3.6/dist-packages/sklearn/model_selection/_search.py in evaluate_candidates(candidate_params)
687 for parameters, (train, test)
688 in product(candidate_params,
--> 689 cv.split(X, y, groups)))
690
691 if len(out) < 1:
/usr/local/lib/python3.6/dist-packages/joblib/parallel.py in __call__(self, iterable)
1015
1016 with self._backend.retrieval_context():
-> 1017 self.retrieve()
1018 # Make sure that we get a last message telling us we are done
1019 elapsed_time = time.time() - self._start_time
/usr/local/lib/python3.6/dist-packages/joblib/parallel.py in retrieve(self)
907 try:
908 if getattr(self._backend, 'supports_timeout', False):
--> 909 self._output.extend(job.get(timeout=self.timeout))
910 else:
911 self._output.extend(job.get())
/usr/local/lib/python3.6/dist-packages/joblib/_parallel_backends.py in wrap_future_result(future, timeout)
560 AsyncResults.get from multiprocessing."""
561 try:
--> 562 return future.result(timeout=timeout)
563 except LokyTimeoutError:
564 raise TimeoutError()
/usr/lib/python3.6/concurrent/futures/_base.py in result(self, timeout)
425 return self.__get_result()
426
--> 427 self._condition.wait(timeout)
428
429 if self._state in [CANCELLED, CANCELLED_AND_NOTIFIED]:
/usr/lib/python3.6/threading.py in wait(self, timeout)
293 try: # restore state no matter what (e.g., KeyboardInterrupt)
294 if timeout is None:
--> 295 waiter.acquire()
296 gotit = True
297 else:
KeyboardInterrupt:
In [0]:
print(grid_search.best_score_)
print(grid_search.best_params_)
In [0]:
dtc_best = DecisionTreeClassifier(max_depth=11,
min_samples_leaf=50,
min_samples_split=100,
random_state=100)
In [532]:
dtc_best.fit(X_Train_Bal_Standardized_PCA40,Y_Train_Bal )
Out[532]:
DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None, criterion='gini',
max_depth=11, max_features=None, max_leaf_nodes=None,
min_impurity_decrease=0.0, min_impurity_split=None,
min_samples_leaf=50, min_samples_split=100,
min_weight_fraction_leaf=0.0, presort='deprecated',
random_state=100, splitter='best')
In [0]:
Y_TestPreds = dtc_best.predict(X_Test_Standardized_PCA40)
In [540]:
from sklearn.metrics import classification_report
print(classification_report(Y_test,Y_TestPreds))
precision recall f1-score support
0 0.97 0.80 0.88 5499
1 0.25 0.73 0.38 502
accuracy 0.80 6001
macro avg 0.61 0.77 0.63 6001
weighted avg 0.91 0.80 0.84 6001
Recall is important as we want to identify all churn cases. Using Grid Search with Decision Tree we get Test Recall of 0.73
In [0]:
GBC = GradientBoostingClassifier(random_state=100)
In [0]:
from sklearn.ensemble import GradientBoostingClassifier
import xgboost as xgb
from xgboost import XGBClassifier
from xgboost import plot_importance
In [560]:
GBC.fit(X_Train_Bal_Standardized_PCA40,Y_Train_Bal)
Out[560]:
GradientBoostingClassifier(ccp_alpha=0.0, criterion='friedman_mse', init=None,
learning_rate=0.1, loss='deviance', max_depth=3,
max_features=None, max_leaf_nodes=None,
min_impurity_decrease=0.0, min_impurity_split=None,
min_samples_leaf=1, min_samples_split=2,
min_weight_fraction_leaf=0.0, n_estimators=100,
n_iter_no_change=None, presort='deprecated',
random_state=100, subsample=1.0, tol=0.0001,
validation_fraction=0.1, verbose=0,
warm_start=False)
In [0]:
Y_TestPreds = GBC.predict(X_Test_Standardized_PCA40)
In [562]:
print(classification_report(Y_test,Y_TestPreds))
precision recall f1-score support
0 0.98 0.83 0.90 5499
1 0.30 0.80 0.44 502
accuracy 0.83 6001
macro avg 0.64 0.81 0.67 6001
weighted avg 0.92 0.83 0.86 6001
Without Any Tuning Gradient Boost Got Recall of .80 on Churn Detection
In [0]:
# parameter grid
param_grid = {"learning_rate": [0.07],
"subsample": [0.8],
"n_estimators" : [150, 200],
"max_depth" : [5,10]
}
In [570]:
folds = 3
grid_search_GBC = GridSearchCV(GBC,
cv = folds,
param_grid=param_grid,
scoring = 'roc_auc',
return_train_score=True,
verbose = 2,
n_jobs =5)
grid_search_GBC.fit(X_Train_Bal_Standardized_PCA40,Y_Train_Bal)
Fitting 3 folds for each of 4 candidates, totalling 12 fits
[Parallel(n_jobs=5)]: Using backend LokyBackend with 5 concurrent workers.
[Parallel(n_jobs=5)]: Done 10 out of 12 | elapsed: 21.3min remaining: 4.3min
[Parallel(n_jobs=5)]: Done 12 out of 12 | elapsed: 24.3min finished
Out[570]:
GridSearchCV(cv=3, error_score=nan,
estimator=GradientBoostingClassifier(ccp_alpha=0.0,
criterion='friedman_mse',
init=None, learning_rate=0.1,
loss='deviance', max_depth=3,
max_features=None,
max_leaf_nodes=None,
min_impurity_decrease=0.0,
min_impurity_split=None,
min_samples_leaf=1,
min_samples_split=2,
min_weight_fraction_leaf=0.0,
n_estimators=100,
n_iter_no_change=None,
presort='deprecated',
random_state=100,
subsample=1.0, tol=0.0001,
validation_fraction=0.1,
verbose=0, warm_start=False),
iid='deprecated', n_jobs=5,
param_grid={'learning_rate': [0.07], 'max_depth': [5, 10],
'n_estimators': [150, 200], 'subsample': [0.8]},
pre_dispatch='2*n_jobs', refit=True, return_train_score=True,
scoring='roc_auc', verbose=2)
In [571]:
print(grid_search_GBC.best_score_)
print(grid_search_GBC.best_params_)
0.9673505982867697
{'learning_rate': 0.07, 'max_depth': 10, 'n_estimators': 200, 'subsample': 0.8}
In [575]:
GBC_best = GradientBoostingClassifier(learning_rate=0.07,
max_depth=10,
n_estimators=200,
subsample =0.8,
random_state=100)
GBC_best.fit(X_Train_Bal_Standardized_PCA40,Y_Train_Bal)
Out[575]:
GradientBoostingClassifier(ccp_alpha=0.0, criterion='friedman_mse', init=None,
learning_rate=0.07, loss='deviance', max_depth=10,
max_features=None, max_leaf_nodes=None,
min_impurity_decrease=0.0, min_impurity_split=None,
min_samples_leaf=1, min_samples_split=2,
min_weight_fraction_leaf=0.0, n_estimators=200,
n_iter_no_change=None, presort='deprecated',
random_state=100, subsample=0.8, tol=0.0001,
validation_fraction=0.1, verbose=0,
warm_start=False)
In [576]:
Y_TestPreds = GBC_best.predict(X_Test_Standardized_PCA40)
print(classification_report(Y_test,Y_TestPreds))
precision recall f1-score support
0 0.96 0.93 0.95 5499
1 0.44 0.63 0.52 502
accuracy 0.90 6001
macro avg 0.70 0.78 0.73 6001
weighted avg 0.92 0.90 0.91 6001
In [578]:
model = XGBClassifier()
model.fit(X_Train_Bal_Standardized_PCA40,Y_Train_Bal)
Out[578]:
XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
colsample_bynode=1, colsample_bytree=1, gamma=0,
learning_rate=0.1, max_delta_step=0, max_depth=3,
min_child_weight=1, missing=None, n_estimators=100, n_jobs=1,
nthread=None, objective='binary:logistic', random_state=0,
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
silent=None, subsample=1, verbosity=1)
In [579]:
Y_TestPreds = model.predict(X_Test_Standardized_PCA40)
print(classification_report(Y_test,Y_TestPreds))
precision recall f1-score support
0 0.98 0.83 0.90 5499
1 0.30 0.80 0.43 502
accuracy 0.83 6001
macro avg 0.64 0.82 0.67 6001
weighted avg 0.92 0.83 0.86 6001
XG Boost also gives recall of 0.80 for Churn Prediction
In [0]:
Interpretability
We use the Logistic Regression here w/o PCA to determine insights.
In [0]:
import statsmodels.api as sm
In [588]:
logm1 = sm.GLM(Y_Train_Bal,(sm.add_constant(X_Train_Bal_Standardized)), family = sm.families.Binomial())
logm1.fit().summary()
Out[588]:
Generalized Linear Model Regression Results
Dep. Variable: y No. Observations: 43722
Model: GLM Df Residuals: 43585
Model Family: Binomial Df Model: 136
Link Function: logit Scale: 1.0000
Method: IRLS Log-Likelihood: -17553.
Date: Sat, 16 May 2020 Deviance: 35106.
Time: 20:44:23 Pearson chi2: 1.08e+05
No. Iterations: 100
Covariance Type: nonrobust
coef std err z P>|z| [0.025 0.975]
const -0.2527 0.016 -15.940 0.000 -0.284 -0.222
x1 0.0748 0.057 1.310 0.190 -0.037 0.187
x2 0.2797 0.065 4.303 0.000 0.152 0.407
x3 0.8895 0.075 11.842 0.000 0.742 1.037
x4 -2.6693 1.374 -1.942 0.052 -5.363 0.025
x5 -1.5668 1.224 -1.280 0.201 -3.967 0.833
x6 2.2196 1.026 2.163 0.031 0.208 4.231
x7 -1.9961 1.383 -1.443 0.149 -4.707 0.715
x8 -2.7738 1.204 -2.304 0.021 -5.134 -0.414
x9 2.0325 1.041 1.953 0.051 -0.007 4.072
x10 0.0589 0.025 2.317 0.021 0.009 0.109
x11 0.1112 0.032 3.510 0.000 0.049 0.173
x12 -0.0609 0.022 -2.714 0.007 -0.105 -0.017
x13 0.5029 0.347 1.448 0.148 -0.178 1.183
x14 0.6968 0.278 2.505 0.012 0.152 1.242
x15 -0.6279 0.311 -2.022 0.043 -1.237 -0.019
x16 2520.5958 562.421 4.482 0.000 1418.271 3622.921
x17 -909.7252 559.318 -1.626 0.104 -2005.968 186.517
x18 3852.5977 501.264 7.686 0.000 2870.139 4835.057
x19 2922.0808 652.099 4.481 0.000 1643.990 4200.172
x20 -959.1557 589.883 -1.626 0.104 -2115.305 196.993
x21 4209.1054 547.605 7.686 0.000 3135.819 5282.392
x22 313.3706 69.901 4.483 0.000 176.367 450.374
x23 -102.6249 63.006 -1.629 0.103 -226.115 20.865
x24 336.2401 43.750 7.686 0.000 250.493 421.988
x25 -0.0875 0.019 -4.564 0.000 -0.125 -0.050
x26 0.1101 0.024 4.610 0.000 0.063 0.157
x27 0.0103 0.018 0.562 0.574 -0.026 0.046
x28 -6901.1638 1206.684 -5.719 0.000 -9266.221 -4536.107
x29 -2240.1476 1190.990 -1.881 0.060 -4574.444 94.149
x30 -2065.4335 1122.143 -1.841 0.066 -4264.793 133.926
x31 2457.6706 1622.783 1.514 0.130 -722.926 5638.267
x32 -5026.4985 1611.261 -3.120 0.002 -8184.511 -1868.486
x33 5770.8891 1374.483 4.199 0.000 3076.953 8464.826
x34 2420.7257 1598.847 1.514 0.130 -712.956 5554.408
x35 -5078.6673 1628.369 -3.119 0.002 -8270.212 -1887.123
x36 5716.5212 1361.504 4.199 0.000 3048.022 8385.020
x37 52.3530 34.560 1.515 0.130 -15.384 120.090
x38 -117.3805 37.645 -3.118 0.002 -191.164 -43.597
x39 133.1149 31.748 4.193 0.000 70.890 195.339
x40 -6.696e-09 2.37e-08 -0.282 0.778 -5.32e-08 3.98e-08
x41 7.794e-08 4.21e-08 1.850 0.064 -4.62e-09 1.6e-07
x42 6.606e-08 1.72e-08 3.830 0.000 3.23e-08 9.99e-08
x43 -8723.6759 2771.497 -3.148 0.002 -1.42e+04 -3291.643
x44 -219.4057 2856.539 -0.077 0.939 -5818.119 5379.308
x45 -490.2995 2461.996 -0.199 0.842 -5315.723 4335.124
x46 -197.2020 62.057 -3.178 0.001 -318.831 -75.573
x47 -299.3703 69.835 -4.287 0.000 -436.244 -162.496
x48 300.0779 53.643 5.594 0.000 194.940 405.216
x49 -122.8299 38.693 -3.174 0.002 -198.667 -46.993
x50 -227.6985 53.052 -4.292 0.000 -331.679 -123.718
x51 255.2508 45.610 5.596 0.000 165.857 344.645
x52 -15.4076 4.831 -3.189 0.001 -24.877 -5.939
x53 -56.2123 13.109 -4.288 0.000 -81.906 -30.519
x54 75.7086 13.525 5.598 0.000 49.200 102.217
x55 5583.6201 1756.852 3.178 0.001 2140.254 9026.986
x56 8377.7982 1951.902 4.292 0.000 4552.141 1.22e+04
x57 -9505.2792 1697.893 -5.598 0.000 -1.28e+04 -6177.471
x58 -328.2410 378.530 -0.867 0.386 -1070.147 413.665
x59 5101.1106 367.740 13.872 0.000 4380.353 5821.868
x60 5151.9386 339.358 15.181 0.000 4486.809 5817.068
x61 -461.0601 531.722 -0.867 0.386 -1503.217 581.097
x62 7310.8646 527.003 13.873 0.000 6277.958 8343.771
x63 7172.8250 472.507 15.180 0.000 6246.729 8098.921
x64 -84.9977 97.984 -0.867 0.386 -277.044 107.048
x65 1529.2090 110.241 13.872 0.000 1313.141 1745.277
x66 1408.9462 92.800 15.183 0.000 1227.062 1590.831
x67 -3821.1257 1009.410 -3.786 0.000 -5799.533 -1842.718
x68 -8427.9077 985.835 -8.549 0.000 -1.04e+04 -6495.707
x69 3069.1028 938.098 3.272 0.001 1230.465 4907.740
x70 -1336.1406 242.635 -5.507 0.000 -1811.697 -860.584
x71 -326.1085 237.298 -1.374 0.169 -791.204 138.987
x72 465.4869 204.558 2.276 0.023 64.560 866.414
x73 -1680.9542 305.245 -5.507 0.000 -2279.223 -1082.685
x74 -471.7712 343.109 -1.375 0.169 -1144.253 200.711
x75 737.0093 323.657 2.277 0.023 102.654 1371.365
x76 -286.7015 52.062 -5.507 0.000 -388.742 -184.661
x77 -70.0710 50.961 -1.375 0.169 -169.953 29.811
x78 121.2475 53.275 2.276 0.023 16.830 225.665
x79 6.224e-08 4.53e-08 1.375 0.169 -2.65e-08 1.51e-07
x80 -1.336e-07 2.74e-08 -4.877 0.000 -1.87e-07 -7.99e-08
x81 2.733e-08 2.14e-08 1.277 0.202 -1.46e-08 6.93e-08
x82 441.6462 502.479 0.879 0.379 -543.195 1426.487
x83 1581.4866 551.160 2.869 0.004 501.232 2661.741
x84 5039.0494 508.206 9.915 0.000 4042.984 6035.115
x85 5169.6516 848.769 6.091 0.000 3506.095 6833.208
x86 -2376.9208 886.317 -2.682 0.007 -4114.070 -639.772
x87 -1.604e+04 840.162 -19.087 0.000 -1.77e+04 -1.44e+04
x88 -2.4761 0.427 -5.795 0.000 -3.314 -1.639
x89 0.9658 0.399 2.418 0.016 0.183 1.749
x90 5.6240 0.309 18.202 0.000 5.018 6.230
x91 -953.2086 156.511 -6.090 0.000 -1259.965 -646.453
x92 464.1058 173.060 2.682 0.007 124.914 803.297
x93 2899.7774 151.924 19.087 0.000 2602.011 3197.544
x94 -177.7868 29.178 -6.093 0.000 -234.975 -120.599
x95 90.0054 33.571 2.681 0.007 24.207 155.803
x96 542.2828 28.409 19.089 0.000 486.603 597.963
x97 -0.0747 0.028 -2.636 0.008 -0.130 -0.019
x98 0.3021 0.035 8.616 0.000 0.233 0.371
x99 -0.3927 0.033 -12.031 0.000 -0.457 -0.329
x100 0.1258 0.063 2.010 0.044 0.003 0.249
x101 -0.4331 0.068 -6.356 0.000 -0.567 -0.300
x102 -0.6658 0.078 -8.572 0.000 -0.818 -0.514
x103 -0.1992 0.030 -6.748 0.000 -0.257 -0.141
x104 0.1457 0.029 5.096 0.000 0.090 0.202
x105 0.3939 0.032 12.226 0.000 0.331 0.457
x106 -0.0325 0.022 -1.509 0.131 -0.075 0.010
x107 -0.0373 0.020 -1.904 0.057 -0.076 0.001
x108 -0.4868 0.023 -20.813 0.000 -0.533 -0.441
x109 0.1232 0.015 7.963 0.000 0.093 0.154
x110 0.0758 0.019 4.071 0.000 0.039 0.112
x111 -0.2312 0.021 -11.207 0.000 -0.272 -0.191
x112 0.2923 0.039 7.560 0.000 0.217 0.368
x113 0.1737 0.042 4.168 0.000 0.092 0.255
x114 -0.2218 0.046 -4.820 0.000 -0.312 -0.132
x115 -0.2589 0.047 -5.540 0.000 -0.350 -0.167
x116 -0.1820 0.050 -3.662 0.000 -0.279 -0.085
x117 0.0053 0.062 0.086 0.931 -0.115 0.126
x118 0.0053 0.021 0.256 0.798 -0.036 0.046
x119 0.1221 0.023 5.207 0.000 0.076 0.168
x120 -0.2759 0.028 -10.028 0.000 -0.330 -0.222
x121 0.0103 0.033 0.314 0.754 -0.054 0.075
x122 0.0874 0.036 2.432 0.015 0.017 0.158
x123 -0.0860 0.037 -2.298 0.022 -0.159 -0.013
x124 -0.1631 0.056 -2.914 0.004 -0.273 -0.053
x125 -0.3148 0.060 -5.258 0.000 -0.432 -0.197
x126 0.3975 0.058 6.838 0.000 0.284 0.511
x127 -0.0979 0.021 -4.674 0.000 -0.139 -0.057
x128 -0.1241 0.022 -5.590 0.000 -0.168 -0.081
x129 -0.1275 0.025 -5.105 0.000 -0.176 -0.079
x130 0.1119 0.016 7.203 0.000 0.081 0.142
x131 0.0306 0.018 1.676 0.094 -0.005 0.066
x132 -0.1871 0.020 -9.430 0.000 -0.226 -0.148
x133 0.0905 0.031 2.961 0.003 0.031 0.150
x134 0.0272 0.032 0.860 0.390 -0.035 0.089
x135 -0.1365 0.037 -3.653 0.000 -0.210 -0.063
x136 0.0574 0.021 2.688 0.007 0.016 0.099
x137 0.1527 0.028 5.417 0.000 0.097 0.208
x138 -0.0847 0.027 -3.086 0.002 -0.138 -0.031
x139 0.0874 0.062 1.405 0.160 -0.035 0.209
x140 0.3018 0.067 4.538 0.000 0.171 0.432
x141 -0.6006 0.066 -9.153 0.000 -0.729 -0.472
x142 -0.1328 0.016 -8.474 0.000 -0.164 -0.102
x143 -0.1630 0.028 -5.919 0.000 -0.217 -0.109
x144 0.0805 0.028 2.918 0.004 0.026 0.135
x145 -0.0253 0.024 -1.058 0.290 -0.072 0.022
In [0]:
from sklearn.linear_model import LogisticRegression
logreg = LogisticRegression(max_iter=500)
In [0]:
from sklearn.feature_selection import RFE
rfe = RFE(logreg, 40)
rfe = rfe.fit(X_Train_Bal_Standardized, Y_Train_Bal)
In [597]:
rfe.support_
Out[597]:
array([False, True, True, False, True, False, False, True, False,
False, False, False, False, True, False, False, False, True,
False, False, True, True, True, False, False, False, False,
False, False, True, False, True, False, False, False, False,
False, False, True, False, False, False, False, False, False,
False, False, False, False, False, False, False, False, False,
False, True, True, False, False, True, True, True, True,
False, False, False, False, False, True, False, True, True,
False, False, False, False, False, False, False, False, False,
False, False, True, False, False, False, False, False, True,
False, False, False, False, False, False, False, True, True,
False, False, True, False, False, True, False, False, True,
True, True, True, False, True, False, False, True, False,
False, False, True, False, False, False, False, True, True,
False, False, False, False, False, False, False, False, False,
False, False, False, False, True, True, True, True, False,
False])
In [0]:
Importance = list(zip(X_train.columns, rfe.support_, rfe.ranking_))
In [0]:
col = X_train.columns[rfe.support_]
In [0]:
Y_Pred = rfe.predict(X_Test_Standardized)
In [608]:
print(classification_report(Y_test,Y_TestPreds))
precision recall f1-score support
0 0.98 0.83 0.90 5499
1 0.30 0.80 0.43 502
accuracy 0.83 6001
macro avg 0.64 0.82 0.67 6001
weighted avg 0.92 0.83 0.86 6001
In [623]:
print(Importance)
[('arpu_6', False, 21), ('arpu_7', True, 1), ('arpu_8', True, 1), ('onnet_mou_6', False, 8), ('onnet_mou_7', True, 1), ('onnet_mou_8', False, 68), ('offnet_mou_6', False, 70), ('offnet_mou_7', True, 1), ('offnet_mou_8', False, 95), ('roam_ic_mou_6', False, 52), ('roam_ic_mou_7', False, 6), ('roam_ic_mou_8', False, 69), ('roam_og_mou_6', False, 82), ('roam_og_mou_7', True, 1), ('roam_og_mou_8', False, 90), ('loc_og_t2t_mou_6', False, 10), ('loc_og_t2t_mou_7', False, 67), ('loc_og_t2t_mou_8', True, 1), ('loc_og_t2m_mou_6', False, 40), ('loc_og_t2m_mou_7', False, 17), ('loc_og_t2m_mou_8', True, 1), ('loc_og_t2f_mou_6', True, 1), ('loc_og_t2f_mou_7', True, 1), ('loc_og_t2f_mou_8', False, 29), ('loc_og_t2c_mou_6', False, 33), ('loc_og_t2c_mou_7', False, 32), ('loc_og_t2c_mou_8', False, 100), ('loc_og_mou_6', False, 81), ('loc_og_mou_7', False, 45), ('loc_og_mou_8', True, 1), ('std_og_t2t_mou_6', False, 44), ('std_og_t2t_mou_7', True, 1), ('std_og_t2t_mou_8', False, 49), ('std_og_t2m_mou_6', False, 9), ('std_og_t2m_mou_7', False, 28), ('std_og_t2m_mou_8', False, 57), ('std_og_t2f_mou_6', False, 83), ('std_og_t2f_mou_7', False, 46), ('std_og_t2f_mou_8', True, 1), ('std_og_t2c_mou_6', False, 106), ('std_og_t2c_mou_7', False, 101), ('std_og_t2c_mou_8', False, 102), ('std_og_mou_6', False, 7), ('std_og_mou_7', False, 18), ('std_og_mou_8', False, 16), ('isd_og_mou_6', False, 63), ('isd_og_mou_7', False, 19), ('isd_og_mou_8', False, 22), ('spl_og_mou_6', False, 35), ('spl_og_mou_7', False, 34), ('spl_og_mou_8', False, 85), ('og_others_6', False, 71), ('og_others_7', False, 61), ('og_others_8', False, 79), ('total_og_mou_6', False, 20), ('total_og_mou_7', True, 1), ('total_og_mou_8', True, 1), ('loc_ic_t2t_mou_6', False, 2), ('loc_ic_t2t_mou_7', False, 80), ('loc_ic_t2t_mou_8', True, 1), ('loc_ic_t2m_mou_6', True, 1), ('loc_ic_t2m_mou_7', True, 1), ('loc_ic_t2m_mou_8', True, 1), ('loc_ic_t2f_mou_6', False, 97), ('loc_ic_t2f_mou_7', False, 84), ('loc_ic_t2f_mou_8', False, 73), ('loc_ic_mou_6', False, 48), ('loc_ic_mou_7', False, 60), ('loc_ic_mou_8', True, 1), ('std_ic_t2t_mou_6', False, 47), ('std_ic_t2t_mou_7', True, 1), ('std_ic_t2t_mou_8', True, 1), ('std_ic_t2m_mou_6', False, 59), ('std_ic_t2m_mou_7', False, 58), ('std_ic_t2m_mou_8', False, 65), ('std_ic_t2f_mou_6', False, 98), ('std_ic_t2f_mou_7', False, 87), ('std_ic_t2f_mou_8', False, 41), ('std_ic_t2o_mou_6', False, 104), ('std_ic_t2o_mou_7', False, 105), ('std_ic_t2o_mou_8', False, 103), ('std_ic_mou_6', False, 96), ('std_ic_mou_7', False, 89), ('std_ic_mou_8', True, 1), ('total_ic_mou_6', False, 94), ('total_ic_mou_7', False, 86), ('total_ic_mou_8', False, 30), ('spl_ic_mou_6', False, 43), ('spl_ic_mou_7', False, 42), ('spl_ic_mou_8', True, 1), ('isd_ic_mou_6', False, 78), ('isd_ic_mou_7', False, 88), ('isd_ic_mou_8', False, 53), ('ic_others_6', False, 54), ('ic_others_7', False, 74), ('ic_others_8', False, 66), ('total_rech_num_6', False, 62), ('total_rech_num_7', True, 1), ('total_rech_num_8', True, 1), ('total_rech_amt_6', False, 50), ('total_rech_amt_7', False, 15), ('total_rech_amt_8', True, 1), ('max_rech_amt_6', False, 24), ('max_rech_amt_7', False, 23), ('max_rech_amt_8', True, 1), ('last_day_rch_amt_6', False, 72), ('last_day_rch_amt_7', False, 51), ('last_day_rch_amt_8', True, 1), ('total_rech_data_6', True, 1), ('total_rech_data_7', True, 1), ('total_rech_data_8', True, 1), ('max_rech_data_6', False, 5), ('max_rech_data_7', True, 1), ('max_rech_data_8', False, 4), ('av_rech_amt_data_6', False, 12), ('av_rech_amt_data_7', True, 1), ('av_rech_amt_data_8', False, 93), ('vol_2g_mb_6', False, 99), ('vol_2g_mb_7', False, 14), ('vol_2g_mb_8', True, 1), ('vol_3g_mb_6', False, 77), ('vol_3g_mb_7', False, 64), ('vol_3g_mb_8', False, 75), ('night_pck_user_6', False, 25), ('night_pck_user_7', True, 1), ('night_pck_user_8', True, 1), ('monthly_2g_6', False, 3), ('monthly_2g_7', False, 13), ('monthly_2g_8', False, 31), ('sachet_2g_6', False, 55), ('sachet_2g_7', False, 91), ('sachet_2g_8', False, 38), ('monthly_3g_6', False, 36), ('monthly_3g_7', False, 92), ('monthly_3g_8', False, 37), ('sachet_3g_6', False, 56), ('sachet_3g_7', False, 11), ('sachet_3g_8', False, 39), ('fb_user_6', False, 26), ('fb_user_7', True, 1), ('fb_user_8', True, 1), ('aon', True, 1), ('aug_vbc_3g', True, 1), ('jul_vbc_3g', False, 27), ('jun_vbc_3g', False, 76)]
In [624]:
def sortingVal(row):
return row[2]
Importance.sort( key=sortingVal)
print (Importance)
[('arpu_7', True, 1), ('arpu_8', True, 1), ('onnet_mou_7', True, 1), ('offnet_mou_7', True, 1), ('roam_og_mou_7', True, 1), ('loc_og_t2t_mou_8', True, 1), ('loc_og_t2m_mou_8', True, 1), ('loc_og_t2f_mou_6', True, 1), ('loc_og_t2f_mou_7', True, 1), ('loc_og_mou_8', True, 1), ('std_og_t2t_mou_7', True, 1), ('std_og_t2f_mou_8', True, 1), ('total_og_mou_7', True, 1), ('total_og_mou_8', True, 1), ('loc_ic_t2t_mou_8', True, 1), ('loc_ic_t2m_mou_6', True, 1), ('loc_ic_t2m_mou_7', True, 1), ('loc_ic_t2m_mou_8', True, 1), ('loc_ic_mou_8', True, 1), ('std_ic_t2t_mou_7', True, 1), ('std_ic_t2t_mou_8', True, 1), ('std_ic_mou_8', True, 1), ('spl_ic_mou_8', True, 1), ('total_rech_num_7', True, 1), ('total_rech_num_8', True, 1), ('total_rech_amt_8', True, 1), ('max_rech_amt_8', True, 1), ('last_day_rch_amt_8', True, 1), ('total_rech_data_6', True, 1), ('total_rech_data_7', True, 1), ('total_rech_data_8', True, 1), ('max_rech_data_7', True, 1), ('av_rech_amt_data_7', True, 1), ('vol_2g_mb_8', True, 1), ('night_pck_user_7', True, 1), ('night_pck_user_8', True, 1), ('fb_user_7', True, 1), ('fb_user_8', True, 1), ('aon', True, 1), ('aug_vbc_3g', True, 1), ('loc_ic_t2t_mou_6', False, 2), ('monthly_2g_6', False, 3), ('max_rech_data_8', False, 4), ('max_rech_data_6', False, 5), ('roam_ic_mou_7', False, 6), ('std_og_mou_6', False, 7), ('onnet_mou_6', False, 8), ('std_og_t2m_mou_6', False, 9), ('loc_og_t2t_mou_6', False, 10), ('sachet_3g_7', False, 11), ('av_rech_amt_data_6', False, 12), ('monthly_2g_7', False, 13), ('vol_2g_mb_7', False, 14), ('total_rech_amt_7', False, 15), ('std_og_mou_8', False, 16), ('loc_og_t2m_mou_7', False, 17), ('std_og_mou_7', False, 18), ('isd_og_mou_7', False, 19), ('total_og_mou_6', False, 20), ('arpu_6', False, 21), ('isd_og_mou_8', False, 22), ('max_rech_amt_7', False, 23), ('max_rech_amt_6', False, 24), ('night_pck_user_6', False, 25), ('fb_user_6', False, 26), ('jul_vbc_3g', False, 27), ('std_og_t2m_mou_7', False, 28), ('loc_og_t2f_mou_8', False, 29), ('total_ic_mou_8', False, 30), ('monthly_2g_8', False, 31), ('loc_og_t2c_mou_7', False, 32), ('loc_og_t2c_mou_6', False, 33), ('spl_og_mou_7', False, 34), ('spl_og_mou_6', False, 35), ('monthly_3g_6', False, 36), ('monthly_3g_8', False, 37), ('sachet_2g_8', False, 38), ('sachet_3g_8', False, 39), ('loc_og_t2m_mou_6', False, 40), ('std_ic_t2f_mou_8', False, 41), ('spl_ic_mou_7', False, 42), ('spl_ic_mou_6', False, 43), ('std_og_t2t_mou_6', False, 44), ('loc_og_mou_7', False, 45), ('std_og_t2f_mou_7', False, 46), ('std_ic_t2t_mou_6', False, 47), ('loc_ic_mou_6', False, 48), ('std_og_t2t_mou_8', False, 49), ('total_rech_amt_6', False, 50), ('last_day_rch_amt_7', False, 51), ('roam_ic_mou_6', False, 52), ('isd_ic_mou_8', False, 53), ('ic_others_6', False, 54), ('sachet_2g_6', False, 55), ('sachet_3g_6', False, 56), ('std_og_t2m_mou_8', False, 57), ('std_ic_t2m_mou_7', False, 58), ('std_ic_t2m_mou_6', False, 59), ('loc_ic_mou_7', False, 60), ('og_others_7', False, 61), ('total_rech_num_6', False, 62), ('isd_og_mou_6', False, 63), ('vol_3g_mb_7', False, 64), ('std_ic_t2m_mou_8', False, 65), ('ic_others_8', False, 66), ('loc_og_t2t_mou_7', False, 67), ('onnet_mou_8', False, 68), ('roam_ic_mou_8', False, 69), ('offnet_mou_6', False, 70), ('og_others_6', False, 71), ('last_day_rch_amt_6', False, 72), ('loc_ic_t2f_mou_8', False, 73), ('ic_others_7', False, 74), ('vol_3g_mb_8', False, 75), ('jun_vbc_3g', False, 76), ('vol_3g_mb_6', False, 77), ('isd_ic_mou_6', False, 78), ('og_others_8', False, 79), ('loc_ic_t2t_mou_7', False, 80), ('loc_og_mou_6', False, 81), ('roam_og_mou_6', False, 82), ('std_og_t2f_mou_6', False, 83), ('loc_ic_t2f_mou_7', False, 84), ('spl_og_mou_8', False, 85), ('total_ic_mou_7', False, 86), ('std_ic_t2f_mou_7', False, 87), ('isd_ic_mou_7', False, 88), ('std_ic_mou_7', False, 89), ('roam_og_mou_8', False, 90), ('sachet_2g_7', False, 91), ('monthly_3g_7', False, 92), ('av_rech_amt_data_8', False, 93), ('total_ic_mou_6', False, 94), ('offnet_mou_8', False, 95), ('std_ic_mou_6', False, 96), ('loc_ic_t2f_mou_6', False, 97), ('std_ic_t2f_mou_6', False, 98), ('vol_2g_mb_6', False, 99), ('loc_og_t2c_mou_8', False, 100), ('std_og_t2c_mou_7', False, 101), ('std_og_t2c_mou_8', False, 102), ('std_ic_t2o_mou_8', False, 103), ('std_ic_t2o_mou_6', False, 104), ('std_ic_t2o_mou_7', False, 105), ('std_og_t2c_mou_6', False, 106)]
In [632]:
# Top 10 Predictors for Churn
for item in Importance[:10]:
print(item[0])
arpu_7
arpu_8
onnet_mou_7
offnet_mou_7
roam_og_mou_7
loc_og_t2t_mou_8
loc_og_t2m_mou_8
loc_og_t2f_mou_6
loc_og_t2f_mou_7
loc_og_mou_8
In [0]:
Content source: HemantTiwariGitHub/AndroidNDSunshineProgress
Similar notebooks: