Reading Data


In [0]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [0]:
churn = pd.read_csv("telecom_churn_data.csv")

In [452]:
churn.head(10)


Out[452]:
mobile_number circle_id loc_og_t2o_mou std_og_t2o_mou loc_ic_t2o_mou last_date_of_month_6 last_date_of_month_7 last_date_of_month_8 last_date_of_month_9 arpu_6 arpu_7 arpu_8 arpu_9 onnet_mou_6 onnet_mou_7 onnet_mou_8 onnet_mou_9 offnet_mou_6 offnet_mou_7 offnet_mou_8 offnet_mou_9 roam_ic_mou_6 roam_ic_mou_7 roam_ic_mou_8 roam_ic_mou_9 roam_og_mou_6 roam_og_mou_7 roam_og_mou_8 roam_og_mou_9 loc_og_t2t_mou_6 loc_og_t2t_mou_7 loc_og_t2t_mou_8 loc_og_t2t_mou_9 loc_og_t2m_mou_6 loc_og_t2m_mou_7 loc_og_t2m_mou_8 loc_og_t2m_mou_9 loc_og_t2f_mou_6 loc_og_t2f_mou_7 loc_og_t2f_mou_8 loc_og_t2f_mou_9 loc_og_t2c_mou_6 loc_og_t2c_mou_7 loc_og_t2c_mou_8 loc_og_t2c_mou_9 loc_og_mou_6 loc_og_mou_7 loc_og_mou_8 loc_og_mou_9 std_og_t2t_mou_6 std_og_t2t_mou_7 std_og_t2t_mou_8 std_og_t2t_mou_9 std_og_t2m_mou_6 std_og_t2m_mou_7 std_og_t2m_mou_8 std_og_t2m_mou_9 std_og_t2f_mou_6 std_og_t2f_mou_7 std_og_t2f_mou_8 std_og_t2f_mou_9 std_og_t2c_mou_6 std_og_t2c_mou_7 std_og_t2c_mou_8 std_og_t2c_mou_9 std_og_mou_6 std_og_mou_7 std_og_mou_8 std_og_mou_9 isd_og_mou_6 isd_og_mou_7 isd_og_mou_8 isd_og_mou_9 spl_og_mou_6 spl_og_mou_7 spl_og_mou_8 spl_og_mou_9 og_others_6 og_others_7 og_others_8 og_others_9 total_og_mou_6 total_og_mou_7 total_og_mou_8 total_og_mou_9 loc_ic_t2t_mou_6 loc_ic_t2t_mou_7 loc_ic_t2t_mou_8 loc_ic_t2t_mou_9 loc_ic_t2m_mou_6 loc_ic_t2m_mou_7 loc_ic_t2m_mou_8 loc_ic_t2m_mou_9 loc_ic_t2f_mou_6 loc_ic_t2f_mou_7 loc_ic_t2f_mou_8 loc_ic_t2f_mou_9 loc_ic_mou_6 loc_ic_mou_7 loc_ic_mou_8 loc_ic_mou_9 std_ic_t2t_mou_6 std_ic_t2t_mou_7 std_ic_t2t_mou_8 std_ic_t2t_mou_9 std_ic_t2m_mou_6 std_ic_t2m_mou_7 std_ic_t2m_mou_8 std_ic_t2m_mou_9 std_ic_t2f_mou_6 std_ic_t2f_mou_7 std_ic_t2f_mou_8 std_ic_t2f_mou_9 std_ic_t2o_mou_6 std_ic_t2o_mou_7 std_ic_t2o_mou_8 std_ic_t2o_mou_9 std_ic_mou_6 std_ic_mou_7 std_ic_mou_8 std_ic_mou_9 total_ic_mou_6 total_ic_mou_7 total_ic_mou_8 total_ic_mou_9 spl_ic_mou_6 spl_ic_mou_7 spl_ic_mou_8 spl_ic_mou_9 isd_ic_mou_6 isd_ic_mou_7 isd_ic_mou_8 isd_ic_mou_9 ic_others_6 ic_others_7 ic_others_8 ic_others_9 total_rech_num_6 total_rech_num_7 total_rech_num_8 total_rech_num_9 total_rech_amt_6 total_rech_amt_7 total_rech_amt_8 total_rech_amt_9 max_rech_amt_6 max_rech_amt_7 max_rech_amt_8 max_rech_amt_9 date_of_last_rech_6 date_of_last_rech_7 date_of_last_rech_8 date_of_last_rech_9 last_day_rch_amt_6 last_day_rch_amt_7 last_day_rch_amt_8 last_day_rch_amt_9 date_of_last_rech_data_6 date_of_last_rech_data_7 date_of_last_rech_data_8 date_of_last_rech_data_9 total_rech_data_6 total_rech_data_7 total_rech_data_8 total_rech_data_9 max_rech_data_6 max_rech_data_7 max_rech_data_8 max_rech_data_9 count_rech_2g_6 count_rech_2g_7 count_rech_2g_8 count_rech_2g_9 count_rech_3g_6 count_rech_3g_7 count_rech_3g_8 count_rech_3g_9 av_rech_amt_data_6 av_rech_amt_data_7 av_rech_amt_data_8 av_rech_amt_data_9 vol_2g_mb_6 vol_2g_mb_7 vol_2g_mb_8 vol_2g_mb_9 vol_3g_mb_6 vol_3g_mb_7 vol_3g_mb_8 vol_3g_mb_9 arpu_3g_6 arpu_3g_7 arpu_3g_8 arpu_3g_9 arpu_2g_6 arpu_2g_7 arpu_2g_8 arpu_2g_9 night_pck_user_6 night_pck_user_7 night_pck_user_8 night_pck_user_9 monthly_2g_6 monthly_2g_7 monthly_2g_8 monthly_2g_9 sachet_2g_6 sachet_2g_7 sachet_2g_8 sachet_2g_9 monthly_3g_6 monthly_3g_7 monthly_3g_8 monthly_3g_9 sachet_3g_6 sachet_3g_7 sachet_3g_8 sachet_3g_9 fb_user_6 fb_user_7 fb_user_8 fb_user_9 aon aug_vbc_3g jul_vbc_3g jun_vbc_3g sep_vbc_3g
0 7000842753 109 0.0 0.0 0.0 6/30/2014 7/31/2014 8/31/2014 9/30/2014 197.385 214.816 213.803 21.100 NaN NaN 0.00 NaN NaN NaN 0.00 NaN NaN NaN 0.00 NaN NaN NaN 0.00 NaN NaN NaN 0.00 NaN NaN NaN 0.00 NaN NaN NaN 0.00 NaN NaN NaN 0.00 NaN NaN NaN 0.00 NaN NaN NaN 0.00 NaN NaN NaN 0.00 NaN NaN NaN 0.00 NaN NaN NaN 0.0 NaN NaN NaN 0.00 NaN NaN NaN 0.00 NaN NaN NaN 0.00 NaN NaN NaN 0.0 NaN 0.00 0.00 0.00 0.00 NaN NaN 0.16 NaN NaN NaN 4.13 NaN NaN NaN 1.15 NaN NaN NaN 5.44 NaN NaN NaN 0.00 NaN NaN NaN 0.00 NaN NaN NaN 0.00 NaN NaN NaN 0.0 NaN NaN NaN 0.00 NaN 0.00 0.00 5.44 0.00 NaN NaN 0.00 NaN NaN NaN 0.00 NaN NaN NaN 0.00 NaN 4 3 2 6 362 252 252 0 252 252 252 0 6/21/2014 7/16/2014 8/8/2014 9/28/2014 252 252 252 0 6/21/2014 7/16/2014 8/8/2014 NaN 1.0 1.0 1.0 NaN 252.0 252.0 252.0 NaN 0.0 0.0 0.0 NaN 1.0 1.0 1.0 NaN 252.0 252.0 252.0 NaN 30.13 1.32 5.75 0.0 83.57 150.76 109.61 0.00 212.17 212.17 212.17 NaN 212.17 212.17 212.17 NaN 0.0 0.0 0.0 NaN 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 0 1.0 1.0 1.0 NaN 968 30.40 0.00 101.20 3.58
1 7001865778 109 0.0 0.0 0.0 6/30/2014 7/31/2014 8/31/2014 9/30/2014 34.047 355.074 268.321 86.285 24.11 78.68 7.68 18.34 15.74 99.84 304.76 53.76 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 23.88 74.56 7.68 18.34 11.51 75.94 291.86 53.76 0.00 0.00 0.00 0.00 0.00 2.91 0.00 0.00 35.39 150.51 299.54 72.11 0.23 4.11 0.00 0.00 0.00 0.46 0.13 0.00 0.00 0.00 0.00 0.00 0.0 0.0 0.0 0.0 0.23 4.58 0.13 0.00 0.0 0.00 0.00 0.0 4.68 23.43 12.76 0.00 0.00 0.0 0.0 0.00 40.31 178.53 312.44 72.11 1.61 29.91 29.23 116.09 17.48 65.38 375.58 56.93 0.00 8.93 3.61 0.00 19.09 104.23 408.43 173.03 0.00 0.00 2.35 0.00 5.90 0.00 12.49 15.01 0.00 0.00 0.00 0.00 0.0 0.0 0.0 0.0 5.90 0.00 14.84 15.01 26.83 104.23 423.28 188.04 0.00 0.00 0.00 0.00 1.83 0.00 0.00 0.00 0.00 0.00 0.00 0.00 4 9 11 5 74 384 283 121 44 154 65 50 6/29/2014 7/31/2014 8/28/2014 9/30/2014 44 23 30 0 NaN 7/25/2014 8/10/2014 NaN NaN 1.0 2.0 NaN NaN 154.0 25.0 NaN NaN 1.0 2.0 NaN NaN 0.0 0.0 NaN NaN 154.0 50.0 NaN 0.00 108.07 365.47 0.0 0.00 0.00 0.00 0.00 NaN 0.00 0.00 NaN NaN 28.61 7.60 NaN NaN 0.0 0.0 NaN 0 1 0 0 0 0 2 0 0 0 0 0 0 0 0 0 NaN 1.0 1.0 NaN 1006 0.00 0.00 0.00 0.00
2 7001625959 109 0.0 0.0 0.0 6/30/2014 7/31/2014 8/31/2014 9/30/2014 167.690 189.058 210.226 290.714 11.54 55.24 37.26 74.81 143.33 220.59 208.36 118.91 0.00 0.00 0.00 38.49 0.00 0.00 0.00 70.94 7.19 28.74 13.58 14.39 29.34 16.86 38.46 28.16 24.11 21.79 15.61 22.24 0.00 135.54 45.76 0.48 60.66 67.41 67.66 64.81 4.34 26.49 22.58 8.76 41.81 67.41 75.53 9.28 1.48 14.76 22.83 0.00 0.0 0.0 0.0 0.0 47.64 108.68 120.94 18.04 0.0 0.00 0.00 0.0 46.56 236.84 96.84 42.08 0.45 0.0 0.0 0.00 155.33 412.94 285.46 124.94 115.69 71.11 67.46 148.23 14.38 15.44 38.89 38.98 99.48 122.29 49.63 158.19 229.56 208.86 155.99 345.41 72.41 71.29 28.69 49.44 45.18 177.01 167.09 118.18 21.73 58.34 43.23 3.86 0.0 0.0 0.0 0.0 139.33 306.66 239.03 171.49 370.04 519.53 395.03 517.74 0.21 0.00 0.00 0.45 0.00 0.85 0.00 0.01 0.93 3.14 0.00 0.36 5 4 2 7 168 315 116 358 86 200 86 100 6/17/2014 7/24/2014 8/14/2014 9/29/2014 0 200 86 0 NaN NaN NaN 9/17/2014 NaN NaN NaN 1.0 NaN NaN NaN 46.0 NaN NaN NaN 1.0 NaN NaN NaN 0.0 NaN NaN NaN 46.0 0.00 0.00 0.00 0.0 0.00 0.00 0.00 8.42 NaN NaN NaN 2.84 NaN NaN NaN 0.0 NaN NaN NaN 0.0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 NaN NaN NaN 1.0 1103 0.00 0.00 4.17 0.00
3 7001204172 109 0.0 0.0 0.0 6/30/2014 7/31/2014 8/31/2014 9/30/2014 221.338 251.102 508.054 389.500 99.91 54.39 310.98 241.71 123.31 109.01 71.68 113.54 0.00 54.86 44.38 0.00 0.00 28.09 39.04 0.00 73.68 34.81 10.61 15.49 107.43 83.21 22.46 65.46 1.91 0.65 4.91 2.06 0.00 0.00 0.00 0.00 183.03 118.68 37.99 83.03 26.23 14.89 289.58 226.21 2.99 1.73 6.53 9.99 0.00 0.00 0.00 0.00 0.0 0.0 0.0 0.0 29.23 16.63 296.11 236.21 0.0 0.00 0.00 0.0 10.96 0.00 18.09 43.29 0.00 0.0 0.0 0.00 223.23 135.31 352.21 362.54 62.08 19.98 8.04 41.73 113.96 64.51 20.28 52.86 57.43 27.09 19.84 65.59 233.48 111.59 48.18 160.19 43.48 66.44 0.00 129.84 1.33 38.56 4.94 13.98 1.18 0.00 0.00 0.00 0.0 0.0 0.0 0.0 45.99 105.01 4.94 143.83 280.08 216.61 53.13 305.38 0.59 0.00 0.00 0.55 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.80 10 11 18 14 230 310 601 410 60 50 50 50 6/28/2014 7/31/2014 8/31/2014 9/30/2014 30 50 50 30 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 0.00 0.00 0.00 0.0 0.00 0.00 0.00 0.00 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 NaN NaN NaN NaN 2491 0.00 0.00 0.00 0.00
4 7000142493 109 0.0 0.0 0.0 6/30/2014 7/31/2014 8/31/2014 9/30/2014 261.636 309.876 238.174 163.426 50.31 149.44 83.89 58.78 76.96 91.88 124.26 45.81 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 50.31 149.44 83.89 58.78 67.64 91.88 124.26 37.89 0.00 0.00 0.00 1.93 0.00 0.00 0.00 0.00 117.96 241.33 208.16 98.61 0.00 0.00 0.00 0.00 9.31 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.0 0.0 0.0 0.0 9.31 0.00 0.00 0.00 0.0 0.00 0.00 0.0 0.00 0.00 0.00 5.98 0.00 0.0 0.0 0.00 127.28 241.33 208.16 104.59 105.68 88.49 233.81 154.56 106.84 109.54 104.13 48.24 1.50 0.00 0.00 0.00 214.03 198.04 337.94 202.81 0.00 0.00 0.86 2.31 1.93 0.25 0.00 0.00 0.00 0.00 0.00 0.00 0.0 0.0 0.0 0.0 1.93 0.25 0.86 2.31 216.44 198.29 338.81 205.31 0.00 0.00 0.00 0.18 0.00 0.00 0.00 0.00 0.48 0.00 0.00 0.00 5 6 3 4 196 350 287 200 56 110 110 50 6/26/2014 7/28/2014 8/9/2014 9/28/2014 50 110 110 50 6/4/2014 NaN NaN NaN 1.0 NaN NaN NaN 56.0 NaN NaN NaN 1.0 NaN NaN NaN 0.0 NaN NaN NaN 56.0 NaN NaN NaN 0.00 0.00 0.00 0.0 0.00 0.00 0.00 0.00 0.00 NaN NaN NaN 0.00 NaN NaN NaN 0.0 NaN NaN NaN 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0.0 NaN NaN NaN 1526 0.00 0.00 0.00 0.00
5 7000286308 109 0.0 0.0 0.0 6/30/2014 7/31/2014 8/31/2014 9/30/2014 50.258 58.810 83.386 170.826 50.16 43.63 85.48 138.79 19.28 13.44 14.46 46.91 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 50.16 43.63 85.48 138.79 16.39 8.83 12.38 44.78 0.00 0.00 0.00 2.13 0.00 0.00 0.00 0.00 66.56 52.46 97.86 185.71 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 2.88 4.61 2.08 0.00 0.0 0.0 0.0 0.0 2.88 4.61 2.08 0.00 0.0 0.00 0.00 0.0 0.00 0.00 0.00 0.00 0.00 0.0 0.0 0.00 69.44 57.08 99.94 185.71 28.73 30.03 56.26 68.38 49.19 57.44 62.46 84.01 0.00 0.00 0.00 0.00 77.93 87.48 118.73 152.39 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 77.03 71.06 37.93 52.03 0.0 0.0 0.0 0.0 77.03 71.06 37.93 52.03 155.39 158.76 157.13 205.39 0.43 0.21 0.23 0.53 0.00 0.00 0.00 0.00 0.00 0.00 0.23 0.43 2 2 3 3 120 0 130 130 120 0 130 130 6/19/2014 7/17/2014 8/24/2014 9/28/2014 120 0 0 0 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 0.00 0.00 0.00 0.0 0.00 0.00 0.00 0.00 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 NaN NaN NaN NaN 1471 0.00 0.00 0.00 0.00
6 7001051193 109 0.0 0.0 0.0 6/30/2014 7/31/2014 8/31/2014 9/30/2014 429.023 190.704 255.114 114.751 71.03 45.03 76.66 15.23 262.73 49.24 92.08 50.33 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 71.03 45.03 76.14 15.23 252.23 48.71 80.63 50.33 10.38 0.00 0.00 0.00 0.11 0.00 0.00 0.00 333.64 93.74 156.78 65.56 0.00 0.00 0.51 0.00 0.00 0.53 11.45 0.00 0.00 0.00 0.00 0.00 0.0 0.0 0.0 0.0 0.00 0.53 11.96 0.00 0.0 0.00 0.00 0.0 0.11 0.53 0.00 0.00 0.00 0.0 0.0 0.35 333.76 94.81 168.74 65.91 1857.99 1427.04 1896.43 2334.88 248.64 336.96 265.28 231.41 20.24 22.69 2.51 6.19 2126.89 1786.71 2164.23 2572.49 0.00 0.00 0.00 0.00 1.39 0.76 2.60 0.00 0.00 0.00 0.00 0.00 0.0 0.0 0.0 0.0 1.39 0.76 2.60 0.00 2128.41 1788.06 2167.11 2572.49 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.11 0.58 0.28 0.00 15 10 11 7 499 222 294 141 90 37 50 30 6/28/2014 7/31/2014 8/28/2014 9/28/2014 37 24 10 24 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 0.00 0.00 0.00 0.0 0.00 0.00 0.00 0.00 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 NaN NaN NaN NaN 1673 0.00 0.00 0.00 0.00
7 7000701601 109 0.0 0.0 0.0 6/30/2014 7/31/2014 8/31/2014 9/30/2014 1069.180 1349.850 3171.480 500.000 57.84 54.68 52.29 NaN 453.43 567.16 325.91 NaN 16.23 33.49 31.64 NaN 23.74 12.59 38.06 NaN 51.39 31.38 40.28 NaN 308.63 447.38 162.28 NaN 62.13 55.14 53.23 NaN 0.00 0.00 0.00 NaN 422.16 533.91 255.79 NaN 4.30 23.29 12.01 NaN 49.89 31.76 49.14 NaN 6.66 20.08 16.68 NaN 0.0 0.0 0.0 NaN 60.86 75.14 77.84 NaN 0.0 0.18 10.01 NaN 4.50 0.00 6.50 NaN 0.00 0.0 0.0 NaN 487.53 609.24 350.16 0.00 58.14 32.26 27.31 NaN 217.56 221.49 121.19 NaN 152.16 101.46 39.53 NaN 427.88 355.23 188.04 NaN 36.89 11.83 30.39 NaN 91.44 126.99 141.33 NaN 52.19 34.24 22.21 NaN 0.0 0.0 0.0 NaN 180.54 173.08 193.94 NaN 626.46 558.04 428.74 0.00 0.21 0.00 0.00 NaN 2.06 14.53 31.59 NaN 15.74 15.19 15.14 NaN 5 5 7 3 1580 790 3638 0 1580 790 1580 0 6/27/2014 7/25/2014 8/26/2014 9/30/2014 0 0 779 0 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 0.00 0.00 0.00 0.0 0.00 0.00 0.00 0.00 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 NaN NaN NaN NaN 802 57.74 19.38 18.74 0.00
8 7001524846 109 0.0 0.0 0.0 6/30/2014 7/31/2014 8/31/2014 9/30/2014 378.721 492.223 137.362 166.787 413.69 351.03 35.08 33.46 94.66 80.63 136.48 108.71 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 297.13 217.59 12.49 26.13 80.96 70.58 50.54 34.58 0.00 0.00 0.00 0.00 0.00 0.00 7.15 0.00 378.09 288.18 63.04 60.71 116.56 133.43 22.58 7.33 13.69 10.04 75.69 74.13 0.00 0.00 0.00 0.00 0.0 0.0 0.0 0.0 130.26 143.48 98.28 81.46 0.0 0.00 0.00 0.0 0.00 0.00 10.23 0.00 0.00 0.0 0.0 0.00 508.36 431.66 171.56 142.18 23.84 9.84 0.31 4.03 57.58 13.98 15.48 17.34 0.00 0.00 0.00 0.00 81.43 23.83 15.79 21.38 0.00 0.58 0.10 0.00 22.43 4.08 0.65 13.53 0.00 0.00 0.00 0.00 0.0 0.0 0.0 0.0 22.43 4.66 0.75 13.53 103.86 28.49 16.54 34.91 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 19 21 14 15 437 601 120 186 90 154 30 36 6/25/2014 7/31/2014 8/30/2014 9/30/2014 50 0 10 0 NaN 7/31/2014 8/23/2014 NaN NaN 2.0 3.0 NaN NaN 154.0 23.0 NaN NaN 2.0 3.0 NaN NaN 0.0 0.0 NaN NaN 177.0 69.0 NaN 0.00 356.00 0.03 0.0 0.00 750.95 11.94 0.00 NaN 0.00 19.83 NaN NaN 0.00 0.00 NaN NaN 0.0 0.0 NaN 0 1 0 0 0 1 3 0 0 0 0 0 0 0 0 0 NaN 1.0 1.0 NaN 315 21.03 910.65 122.16 0.00
9 7001864400 109 0.0 0.0 0.0 6/30/2014 7/31/2014 8/31/2014 9/30/2014 119.518 247.435 170.231 160.042 33.89 30.11 22.43 27.84 63.48 54.16 78.34 123.48 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 33.89 30.11 22.43 27.84 38.03 40.06 34.93 37.26 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 71.93 70.18 57.36 65.11 0.00 0.00 0.00 0.00 25.45 14.09 43.41 83.26 0.00 0.00 0.00 2.94 0.0 0.0 0.0 0.0 25.45 14.09 43.41 86.21 0.0 0.00 0.00 0.0 0.66 0.00 0.00 0.00 0.00 0.0 0.0 0.00 98.04 84.28 100.78 151.33 129.34 124.34 49.93 313.38 132.94 96.24 122.58 65.06 0.40 0.00 0.00 0.48 262.69 220.59 172.51 378.93 0.30 0.00 0.00 4.38 32.86 78.21 1.74 1.18 0.00 0.00 0.00 0.00 0.0 0.0 0.0 0.0 33.16 78.21 1.74 5.56 303.98 327.31 219.86 412.63 0.00 0.00 0.00 0.00 8.11 28.49 45.59 28.13 0.00 0.00 0.00 0.00 4 2 5 3 220 195 210 180 110 154 50 130 6/29/2014 7/23/2014 8/29/2014 9/20/2014 110 154 30 50 NaN 7/23/2014 NaN NaN NaN 1.0 NaN NaN NaN 154.0 NaN NaN NaN 1.0 NaN NaN NaN 0.0 NaN NaN NaN 154.0 NaN NaN 0.00 7.37 0.00 0.0 0.00 0.00 0.00 0.00 NaN 0.00 NaN NaN NaN 0.00 NaN NaN NaN 0.0 NaN NaN 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 NaN 1.0 NaN NaN 902 0.00 0.00 0.00 0.00

In [0]:
#To increase the display width of the data (i.e. #columns,rows)
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

In [454]:
churn.info(verbose=True)


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 99999 entries, 0 to 99998
Data columns (total 226 columns):
 #   Column                    Dtype  
---  ------                    -----  
 0   mobile_number             int64  
 1   circle_id                 int64  
 2   loc_og_t2o_mou            float64
 3   std_og_t2o_mou            float64
 4   loc_ic_t2o_mou            float64
 5   last_date_of_month_6      object 
 6   last_date_of_month_7      object 
 7   last_date_of_month_8      object 
 8   last_date_of_month_9      object 
 9   arpu_6                    float64
 10  arpu_7                    float64
 11  arpu_8                    float64
 12  arpu_9                    float64
 13  onnet_mou_6               float64
 14  onnet_mou_7               float64
 15  onnet_mou_8               float64
 16  onnet_mou_9               float64
 17  offnet_mou_6              float64
 18  offnet_mou_7              float64
 19  offnet_mou_8              float64
 20  offnet_mou_9              float64
 21  roam_ic_mou_6             float64
 22  roam_ic_mou_7             float64
 23  roam_ic_mou_8             float64
 24  roam_ic_mou_9             float64
 25  roam_og_mou_6             float64
 26  roam_og_mou_7             float64
 27  roam_og_mou_8             float64
 28  roam_og_mou_9             float64
 29  loc_og_t2t_mou_6          float64
 30  loc_og_t2t_mou_7          float64
 31  loc_og_t2t_mou_8          float64
 32  loc_og_t2t_mou_9          float64
 33  loc_og_t2m_mou_6          float64
 34  loc_og_t2m_mou_7          float64
 35  loc_og_t2m_mou_8          float64
 36  loc_og_t2m_mou_9          float64
 37  loc_og_t2f_mou_6          float64
 38  loc_og_t2f_mou_7          float64
 39  loc_og_t2f_mou_8          float64
 40  loc_og_t2f_mou_9          float64
 41  loc_og_t2c_mou_6          float64
 42  loc_og_t2c_mou_7          float64
 43  loc_og_t2c_mou_8          float64
 44  loc_og_t2c_mou_9          float64
 45  loc_og_mou_6              float64
 46  loc_og_mou_7              float64
 47  loc_og_mou_8              float64
 48  loc_og_mou_9              float64
 49  std_og_t2t_mou_6          float64
 50  std_og_t2t_mou_7          float64
 51  std_og_t2t_mou_8          float64
 52  std_og_t2t_mou_9          float64
 53  std_og_t2m_mou_6          float64
 54  std_og_t2m_mou_7          float64
 55  std_og_t2m_mou_8          float64
 56  std_og_t2m_mou_9          float64
 57  std_og_t2f_mou_6          float64
 58  std_og_t2f_mou_7          float64
 59  std_og_t2f_mou_8          float64
 60  std_og_t2f_mou_9          float64
 61  std_og_t2c_mou_6          float64
 62  std_og_t2c_mou_7          float64
 63  std_og_t2c_mou_8          float64
 64  std_og_t2c_mou_9          float64
 65  std_og_mou_6              float64
 66  std_og_mou_7              float64
 67  std_og_mou_8              float64
 68  std_og_mou_9              float64
 69  isd_og_mou_6              float64
 70  isd_og_mou_7              float64
 71  isd_og_mou_8              float64
 72  isd_og_mou_9              float64
 73  spl_og_mou_6              float64
 74  spl_og_mou_7              float64
 75  spl_og_mou_8              float64
 76  spl_og_mou_9              float64
 77  og_others_6               float64
 78  og_others_7               float64
 79  og_others_8               float64
 80  og_others_9               float64
 81  total_og_mou_6            float64
 82  total_og_mou_7            float64
 83  total_og_mou_8            float64
 84  total_og_mou_9            float64
 85  loc_ic_t2t_mou_6          float64
 86  loc_ic_t2t_mou_7          float64
 87  loc_ic_t2t_mou_8          float64
 88  loc_ic_t2t_mou_9          float64
 89  loc_ic_t2m_mou_6          float64
 90  loc_ic_t2m_mou_7          float64
 91  loc_ic_t2m_mou_8          float64
 92  loc_ic_t2m_mou_9          float64
 93  loc_ic_t2f_mou_6          float64
 94  loc_ic_t2f_mou_7          float64
 95  loc_ic_t2f_mou_8          float64
 96  loc_ic_t2f_mou_9          float64
 97  loc_ic_mou_6              float64
 98  loc_ic_mou_7              float64
 99  loc_ic_mou_8              float64
 100 loc_ic_mou_9              float64
 101 std_ic_t2t_mou_6          float64
 102 std_ic_t2t_mou_7          float64
 103 std_ic_t2t_mou_8          float64
 104 std_ic_t2t_mou_9          float64
 105 std_ic_t2m_mou_6          float64
 106 std_ic_t2m_mou_7          float64
 107 std_ic_t2m_mou_8          float64
 108 std_ic_t2m_mou_9          float64
 109 std_ic_t2f_mou_6          float64
 110 std_ic_t2f_mou_7          float64
 111 std_ic_t2f_mou_8          float64
 112 std_ic_t2f_mou_9          float64
 113 std_ic_t2o_mou_6          float64
 114 std_ic_t2o_mou_7          float64
 115 std_ic_t2o_mou_8          float64
 116 std_ic_t2o_mou_9          float64
 117 std_ic_mou_6              float64
 118 std_ic_mou_7              float64
 119 std_ic_mou_8              float64
 120 std_ic_mou_9              float64
 121 total_ic_mou_6            float64
 122 total_ic_mou_7            float64
 123 total_ic_mou_8            float64
 124 total_ic_mou_9            float64
 125 spl_ic_mou_6              float64
 126 spl_ic_mou_7              float64
 127 spl_ic_mou_8              float64
 128 spl_ic_mou_9              float64
 129 isd_ic_mou_6              float64
 130 isd_ic_mou_7              float64
 131 isd_ic_mou_8              float64
 132 isd_ic_mou_9              float64
 133 ic_others_6               float64
 134 ic_others_7               float64
 135 ic_others_8               float64
 136 ic_others_9               float64
 137 total_rech_num_6          int64  
 138 total_rech_num_7          int64  
 139 total_rech_num_8          int64  
 140 total_rech_num_9          int64  
 141 total_rech_amt_6          int64  
 142 total_rech_amt_7          int64  
 143 total_rech_amt_8          int64  
 144 total_rech_amt_9          int64  
 145 max_rech_amt_6            int64  
 146 max_rech_amt_7            int64  
 147 max_rech_amt_8            int64  
 148 max_rech_amt_9            int64  
 149 date_of_last_rech_6       object 
 150 date_of_last_rech_7       object 
 151 date_of_last_rech_8       object 
 152 date_of_last_rech_9       object 
 153 last_day_rch_amt_6        int64  
 154 last_day_rch_amt_7        int64  
 155 last_day_rch_amt_8        int64  
 156 last_day_rch_amt_9        int64  
 157 date_of_last_rech_data_6  object 
 158 date_of_last_rech_data_7  object 
 159 date_of_last_rech_data_8  object 
 160 date_of_last_rech_data_9  object 
 161 total_rech_data_6         float64
 162 total_rech_data_7         float64
 163 total_rech_data_8         float64
 164 total_rech_data_9         float64
 165 max_rech_data_6           float64
 166 max_rech_data_7           float64
 167 max_rech_data_8           float64
 168 max_rech_data_9           float64
 169 count_rech_2g_6           float64
 170 count_rech_2g_7           float64
 171 count_rech_2g_8           float64
 172 count_rech_2g_9           float64
 173 count_rech_3g_6           float64
 174 count_rech_3g_7           float64
 175 count_rech_3g_8           float64
 176 count_rech_3g_9           float64
 177 av_rech_amt_data_6        float64
 178 av_rech_amt_data_7        float64
 179 av_rech_amt_data_8        float64
 180 av_rech_amt_data_9        float64
 181 vol_2g_mb_6               float64
 182 vol_2g_mb_7               float64
 183 vol_2g_mb_8               float64
 184 vol_2g_mb_9               float64
 185 vol_3g_mb_6               float64
 186 vol_3g_mb_7               float64
 187 vol_3g_mb_8               float64
 188 vol_3g_mb_9               float64
 189 arpu_3g_6                 float64
 190 arpu_3g_7                 float64
 191 arpu_3g_8                 float64
 192 arpu_3g_9                 float64
 193 arpu_2g_6                 float64
 194 arpu_2g_7                 float64
 195 arpu_2g_8                 float64
 196 arpu_2g_9                 float64
 197 night_pck_user_6          float64
 198 night_pck_user_7          float64
 199 night_pck_user_8          float64
 200 night_pck_user_9          float64
 201 monthly_2g_6              int64  
 202 monthly_2g_7              int64  
 203 monthly_2g_8              int64  
 204 monthly_2g_9              int64  
 205 sachet_2g_6               int64  
 206 sachet_2g_7               int64  
 207 sachet_2g_8               int64  
 208 sachet_2g_9               int64  
 209 monthly_3g_6              int64  
 210 monthly_3g_7              int64  
 211 monthly_3g_8              int64  
 212 monthly_3g_9              int64  
 213 sachet_3g_6               int64  
 214 sachet_3g_7               int64  
 215 sachet_3g_8               int64  
 216 sachet_3g_9               int64  
 217 fb_user_6                 float64
 218 fb_user_7                 float64
 219 fb_user_8                 float64
 220 fb_user_9                 float64
 221 aon                       int64  
 222 aug_vbc_3g                float64
 223 jul_vbc_3g                float64
 224 jun_vbc_3g                float64
 225 sep_vbc_3g                float64
dtypes: float64(179), int64(35), object(12)
memory usage: 172.4+ MB

In [455]:
churn.info(verbose=False)


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 99999 entries, 0 to 99998
Columns: 226 entries, mobile_number to sep_vbc_3g
dtypes: float64(179), int64(35), object(12)
memory usage: 172.4+ MB

In [456]:
churn.describe(include=[np.number])


Out[456]:
mobile_number circle_id loc_og_t2o_mou std_og_t2o_mou loc_ic_t2o_mou arpu_6 arpu_7 arpu_8 arpu_9 onnet_mou_6 onnet_mou_7 onnet_mou_8 onnet_mou_9 offnet_mou_6 offnet_mou_7 offnet_mou_8 offnet_mou_9 roam_ic_mou_6 roam_ic_mou_7 roam_ic_mou_8 roam_ic_mou_9 roam_og_mou_6 roam_og_mou_7 roam_og_mou_8 roam_og_mou_9 loc_og_t2t_mou_6 loc_og_t2t_mou_7 loc_og_t2t_mou_8 loc_og_t2t_mou_9 loc_og_t2m_mou_6 loc_og_t2m_mou_7 loc_og_t2m_mou_8 loc_og_t2m_mou_9 loc_og_t2f_mou_6 loc_og_t2f_mou_7 loc_og_t2f_mou_8 loc_og_t2f_mou_9 loc_og_t2c_mou_6 loc_og_t2c_mou_7 loc_og_t2c_mou_8 loc_og_t2c_mou_9 loc_og_mou_6 loc_og_mou_7 loc_og_mou_8 loc_og_mou_9 std_og_t2t_mou_6 std_og_t2t_mou_7 std_og_t2t_mou_8 std_og_t2t_mou_9 std_og_t2m_mou_6 std_og_t2m_mou_7 std_og_t2m_mou_8 std_og_t2m_mou_9 std_og_t2f_mou_6 std_og_t2f_mou_7 std_og_t2f_mou_8 std_og_t2f_mou_9 std_og_t2c_mou_6 std_og_t2c_mou_7 std_og_t2c_mou_8 std_og_t2c_mou_9 std_og_mou_6 std_og_mou_7 std_og_mou_8 std_og_mou_9 isd_og_mou_6 isd_og_mou_7 isd_og_mou_8 isd_og_mou_9 spl_og_mou_6 spl_og_mou_7 spl_og_mou_8 spl_og_mou_9 og_others_6 og_others_7 og_others_8 og_others_9 total_og_mou_6 total_og_mou_7 total_og_mou_8 total_og_mou_9 loc_ic_t2t_mou_6 loc_ic_t2t_mou_7 loc_ic_t2t_mou_8 loc_ic_t2t_mou_9 loc_ic_t2m_mou_6 loc_ic_t2m_mou_7 loc_ic_t2m_mou_8 loc_ic_t2m_mou_9 loc_ic_t2f_mou_6 loc_ic_t2f_mou_7 loc_ic_t2f_mou_8 loc_ic_t2f_mou_9 loc_ic_mou_6 loc_ic_mou_7 loc_ic_mou_8 loc_ic_mou_9 std_ic_t2t_mou_6 std_ic_t2t_mou_7 std_ic_t2t_mou_8 std_ic_t2t_mou_9 std_ic_t2m_mou_6 std_ic_t2m_mou_7 std_ic_t2m_mou_8 std_ic_t2m_mou_9 std_ic_t2f_mou_6 std_ic_t2f_mou_7 std_ic_t2f_mou_8 std_ic_t2f_mou_9 std_ic_t2o_mou_6 std_ic_t2o_mou_7 std_ic_t2o_mou_8 std_ic_t2o_mou_9 std_ic_mou_6 std_ic_mou_7 std_ic_mou_8 std_ic_mou_9 total_ic_mou_6 total_ic_mou_7 total_ic_mou_8 total_ic_mou_9 spl_ic_mou_6 spl_ic_mou_7 spl_ic_mou_8 spl_ic_mou_9 isd_ic_mou_6 isd_ic_mou_7 isd_ic_mou_8 isd_ic_mou_9 ic_others_6 ic_others_7 ic_others_8 ic_others_9 total_rech_num_6 total_rech_num_7 total_rech_num_8 total_rech_num_9 total_rech_amt_6 total_rech_amt_7 total_rech_amt_8 total_rech_amt_9 max_rech_amt_6 max_rech_amt_7 max_rech_amt_8 max_rech_amt_9 last_day_rch_amt_6 last_day_rch_amt_7 last_day_rch_amt_8 last_day_rch_amt_9 total_rech_data_6 total_rech_data_7 total_rech_data_8 total_rech_data_9 max_rech_data_6 max_rech_data_7 max_rech_data_8 max_rech_data_9 count_rech_2g_6 count_rech_2g_7 count_rech_2g_8 count_rech_2g_9 count_rech_3g_6 count_rech_3g_7 count_rech_3g_8 count_rech_3g_9 av_rech_amt_data_6 av_rech_amt_data_7 av_rech_amt_data_8 av_rech_amt_data_9 vol_2g_mb_6 vol_2g_mb_7 vol_2g_mb_8 vol_2g_mb_9 vol_3g_mb_6 vol_3g_mb_7 vol_3g_mb_8 vol_3g_mb_9 arpu_3g_6 arpu_3g_7 arpu_3g_8 arpu_3g_9 arpu_2g_6 arpu_2g_7 arpu_2g_8 arpu_2g_9 night_pck_user_6 night_pck_user_7 night_pck_user_8 night_pck_user_9 monthly_2g_6 monthly_2g_7 monthly_2g_8 monthly_2g_9 sachet_2g_6 sachet_2g_7 sachet_2g_8 sachet_2g_9 monthly_3g_6 monthly_3g_7 monthly_3g_8 monthly_3g_9 sachet_3g_6 sachet_3g_7 sachet_3g_8 sachet_3g_9 fb_user_6 fb_user_7 fb_user_8 fb_user_9 aon aug_vbc_3g jul_vbc_3g jun_vbc_3g sep_vbc_3g
count 9.999900e+04 99999.0 98981.0 98981.0 98981.0 99999.000000 99999.000000 99999.000000 99999.000000 96062.000000 96140.000000 94621.000000 92254.000000 96062.000000 96140.000000 94621.000000 92254.000000 96062.000000 96140.000000 94621.000000 92254.000000 96062.000000 96140.000000 94621.000000 92254.000000 96062.000000 96140.000000 94621.000000 92254.000000 96062.000000 96140.000000 94621.000000 92254.000000 96062.000000 96140.000000 94621.000000 92254.000000 96062.000000 96140.000000 94621.000000 92254.000000 96062.000000 96140.000000 94621.000000 92254.000000 96062.000000 96140.000000 94621.000000 92254.000000 96062.000000 96140.000000 94621.000000 92254.000000 96062.000000 96140.000000 94621.000000 92254.000000 96062.0 96140.0 94621.0 92254.0 96062.000000 96140.000000 94621.000000 92254.000000 96062.000000 96140.000000 94621.000000 92254.000000 96062.000000 96140.000000 94621.000000 92254.000000 96062.000000 96140.000000 94621.000000 92254.000000 99999.000000 99999.000000 99999.000000 99999.000000 96062.000000 96140.000000 94621.000000 92254.000000 96062.000000 96140.000000 94621.000000 92254.000000 96062.000000 96140.000000 94621.000000 92254.000000 96062.000000 96140.000000 94621.000000 92254.000000 96062.000000 96140.000000 94621.000000 92254.000000 96062.000000 96140.000000 94621.000000 92254.000000 96062.000000 96140.000000 94621.000000 92254.000000 96062.0 96140.0 94621.0 92254.0 96062.000000 96140.000000 94621.000000 92254.000000 99999.000000 99999.000000 99999.000000 99999.000000 96062.000000 96140.000000 94621.000000 92254.000000 96062.000000 96140.000000 94621.000000 92254.000000 96062.000000 96140.000000 94621.000000 92254.000000 99999.000000 99999.000000 99999.000000 99999.000000 99999.000000 99999.000000 99999.000000 99999.000000 99999.000000 99999.000000 99999.000000 99999.000000 99999.000000 99999.000000 99999.000000 99999.000000 25153.000000 25571.000000 26339.000000 25922.000000 25153.000000 25571.000000 26339.000000 25922.00000 25153.000000 25571.000000 26339.000000 25922.000000 25153.000000 25571.000000 26339.000000 25922.000000 25153.000000 25571.000000 26339.000000 25922.000000 99999.000000 99999.000000 99999.000000 99999.000000 99999.000000 99999.000000 99999.000000 99999.000000 25153.000000 25571.000000 26339.000000 25922.000000 25153.000000 25571.000000 26339.000000 25922.000000 25153.000000 25571.000000 26339.000000 25922.000000 99999.000000 99999.000000 99999.000000 99999.000000 99999.000000 99999.000000 99999.000000 99999.000000 99999.000000 99999.000000 99999.000000 99999.000000 99999.000000 99999.000000 99999.000000 99999.000000 25153.000000 25571.000000 26339.000000 25922.000000 99999.000000 99999.000000 99999.000000 99999.000000 99999.000000
mean 7.001207e+09 109.0 0.0 0.0 0.0 282.987358 278.536648 279.154731 261.645069 132.395875 133.670805 133.018098 130.302327 197.935577 197.045133 196.574803 190.337222 9.950013 7.149898 7.292981 6.343841 13.911337 9.818732 9.971890 8.555519 47.100763 46.473010 45.887806 44.584446 93.342088 91.397131 91.755128 90.463192 3.751013 3.792985 3.677991 3.655123 1.123056 1.368500 1.433821 1.232726 144.201175 141.670476 141.328209 138.709970 79.829870 83.299598 83.282673 82.342919 87.299624 90.804137 89.838390 86.276622 1.129011 1.115010 1.067792 1.042362 0.0 0.0 0.0 0.0 168.261218 175.221436 174.191498 169.664466 0.798277 0.776572 0.791247 0.723892 3.916811 4.978279 5.053769 4.412767 0.454157 0.030235 0.033372 0.047456 305.133424 310.231175 304.119513 289.279198 47.922365 47.990520 47.211362 46.281794 107.475650 107.120493 108.460515 106.155471 12.084305 12.599697 11.751834 12.173105 167.491059 167.719540 167.432575 164.619293 9.575993 10.011904 9.883921 9.432479 20.722240 21.656415 21.183211 19.620913 2.156397 2.216923 2.085004 2.173419 0.0 0.0 0.0 0.0 32.457179 33.887833 33.154735 31.229344 200.130037 202.853055 198.750783 189.214260 0.061557 0.033585 0.040361 0.163137 7.460608 8.334936 8.442001 8.063003 0.854656 1.012960 0.970800 1.017162 7.558806 7.700367 7.212912 6.893019 327.514615 322.962970 324.157122 303.345673 104.637486 104.752398 107.728207 101.943889 63.156252 59.385804 62.641716 43.901249 2.463802 2.666419 2.651999 2.441170 126.393392 126.729459 125.717301 124.94144 1.864668 2.044699 2.016288 1.781807 0.599133 0.621720 0.635711 0.659363 192.600982 200.981292 197.526489 192.734315 51.904956 51.229937 50.170154 44.719701 121.396219 128.995847 135.410689 136.056613 89.555057 89.384120 91.173849 100.264116 86.398003 85.914450 86.599478 93.712026 0.025086 0.023034 0.020844 0.015971 0.079641 0.083221 0.081001 0.068781 0.389384 0.439634 0.450075 0.393104 0.075921 0.078581 0.082941 0.086341 0.074781 0.080401 0.084501 0.084581 0.914404 0.908764 0.890808 0.860968 1219.854749 68.170248 66.839062 60.021204 3.299373
std 6.956694e+05 0.0 0.0 0.0 0.0 328.439770 338.156291 344.474791 341.998630 297.207406 308.794148 308.951589 308.477668 316.851613 325.862803 327.170662 319.396092 72.825411 73.447948 68.402466 57.137537 71.443196 58.455762 64.713221 58.438186 150.856393 155.318705 151.184830 147.995390 162.780544 157.492308 156.537048 158.681454 14.230438 14.264986 13.270996 13.457549 5.448946 7.533445 6.783335 5.619021 251.751489 248.731086 245.914311 245.934517 252.476533 263.631042 265.486090 267.184991 255.617850 269.347911 271.757783 261.407396 7.984970 8.599406 7.905971 8.261770 0.0 0.0 0.0 0.0 389.948499 408.922934 411.633049 405.138658 25.765248 25.603052 25.544471 21.310751 14.936449 20.661570 17.855111 16.328227 4.125911 2.161717 2.323464 3.635466 463.419481 480.031178 478.150031 468.980002 140.258485 145.795055 137.239552 140.130610 171.713903 169.423620 169.723759 165.492803 40.140895 42.977442 39.125379 43.840776 254.124029 256.242707 250.025523 249.845070 54.330607 57.411971 55.073186 53.376273 80.793414 86.521393 83.683565 74.913050 16.495594 16.454061 15.812580 15.978601 0.0 0.0 0.0 0.0 106.283386 113.720168 110.127008 101.982303 291.651671 298.124954 289.321094 284.823024 0.160920 0.155725 0.146147 0.527860 59.722948 65.219829 63.813098 63.505379 11.955164 12.673099 13.284348 12.381172 7.078405 7.070422 7.203753 7.096261 398.019701 408.114237 416.540455 404.588583 120.614894 124.523970 126.902505 125.375109 97.356649 95.915385 104.431816 90.809712 2.789128 3.031593 3.074987 2.516339 108.477235 109.765267 109.437851 111.36376 2.570254 2.768332 2.720132 2.214701 1.274428 1.394524 1.422827 1.411513 192.646318 196.791224 191.301305 188.400286 213.356445 212.302217 212.347892 198.653570 544.247227 541.494013 558.775335 577.394194 193.124653 195.893924 188.180936 216.291992 172.767523 176.379871 168.247852 171.384224 0.156391 0.150014 0.142863 0.125366 0.295058 0.304395 0.299568 0.278120 1.497320 1.636230 1.630263 1.347140 0.363371 0.387231 0.384947 0.384978 0.568344 0.628334 0.660234 0.650457 0.279772 0.287950 0.311885 0.345987 954.733842 267.580450 271.201856 253.938223 32.408353
min 7.000000e+09 109.0 0.0 0.0 0.0 -2258.709000 -2014.045000 -945.808000 -1899.505000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.0 0.0 0.0 0.0 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.0 0.0 0.0 0.0 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.00000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 1.000000 0.500000 0.500000 1.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 -30.820000 -26.040000 -24.490000 -71.090000 -35.830000 -15.480000 -55.830000 -45.740000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 180.000000 0.000000 0.000000 0.000000 0.000000
25% 7.000606e+09 109.0 0.0 0.0 0.0 93.411500 86.980500 84.126000 62.685000 7.380000 6.660000 6.460000 5.330000 34.730000 32.190000 31.630000 27.130000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 1.660000 1.630000 1.600000 1.360000 9.880000 10.025000 9.810000 8.810000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 17.110000 17.480000 17.110000 15.560000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.0 0.0 0.0 0.0 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 44.740000 43.010000 38.580000 25.510000 2.990000 3.230000 3.280000 3.290000 17.290000 18.590000 18.930000 18.560000 0.000000 0.000000 0.000000 0.000000 30.390000 32.460000 32.740000 32.290000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.0 0.0 0.0 0.0 0.000000 0.000000 0.010000 0.000000 38.530000 41.190000 38.290000 32.370000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 3.000000 3.000000 3.000000 3.000000 109.000000 100.000000 90.000000 52.000000 30.000000 30.000000 30.000000 28.000000 0.000000 0.000000 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000 25.000000 25.000000 25.000000 25.00000 1.000000 1.000000 1.000000 1.000000 0.000000 0.000000 0.000000 0.000000 82.000000 92.000000 87.000000 69.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000 467.000000 0.000000 0.000000 0.000000 0.000000
50% 7.001205e+09 109.0 0.0 0.0 0.0 197.704000 191.640000 192.080000 176.849000 34.310000 32.330000 32.360000 29.840000 96.310000 91.735000 92.140000 87.290000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 11.910000 11.610000 11.730000 11.260000 41.030000 40.430000 40.360000 39.120000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 65.110000 63.685000 63.730000 61.840000 0.000000 0.000000 0.000000 0.000000 3.950000 3.635000 3.310000 2.500000 0.000000 0.000000 0.000000 0.000000 0.0 0.0 0.0 0.0 11.640000 11.090000 10.410000 8.410000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 145.140000 141.530000 138.610000 125.460000 15.690000 15.740000 16.030000 15.660000 56.490000 57.080000 58.240000 56.610000 0.880000 0.930000 0.930000 0.960000 92.160000 92.550000 93.830000 91.640000 0.000000 0.000000 0.000000 0.000000 2.030000 2.040000 2.030000 1.740000 0.000000 0.000000 0.000000 0.000000 0.0 0.0 0.0 0.0 5.890000 5.960000 5.880000 5.380000 114.740000 116.340000 114.660000 105.890000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 6.000000 6.000000 5.000000 5.000000 230.000000 220.000000 225.000000 200.000000 110.000000 110.000000 98.000000 61.000000 30.000000 30.000000 30.000000 0.000000 1.000000 1.000000 1.000000 2.000000 145.000000 145.000000 145.000000 145.00000 1.000000 1.000000 1.000000 1.000000 0.000000 0.000000 0.000000 0.000000 154.000000 154.000000 154.000000 164.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.480000 0.420000 0.880000 2.605000 10.830000 8.810000 9.270000 14.800000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000 863.000000 0.000000 0.000000 0.000000 0.000000
75% 7.001812e+09 109.0 0.0 0.0 0.0 371.060000 365.344500 369.370500 353.466500 118.740000 115.595000 115.860000 112.130000 231.860000 226.815000 228.260000 220.505000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 40.960000 39.910000 40.110000 39.280000 110.390000 107.560000 109.090000 106.810000 2.080000 2.090000 2.040000 1.940000 0.000000 0.000000 0.000000 0.000000 168.270000 164.382500 166.110000 162.225000 30.807500 31.132500 30.580000 28.230000 53.290000 54.040000 52.490000 48.560000 0.000000 0.000000 0.000000 0.000000 0.0 0.0 0.0 0.0 144.837500 150.615000 147.940000 142.105000 0.000000 0.000000 0.000000 0.000000 2.430000 3.710000 3.990000 3.230000 0.000000 0.000000 0.000000 0.000000 372.860000 378.570000 369.900000 353.480000 46.840000 45.810000 46.290000 45.180000 132.387500 130.960000 133.930000 130.490000 8.140000 8.282500 8.110000 8.140000 208.075000 205.837500 207.280000 202.737500 4.060000 4.230000 4.080000 3.510000 15.030000 15.740000 15.360000 14.260000 0.000000 0.000000 0.000000 0.000000 0.0 0.0 0.0 0.0 26.930000 28.310000 27.710000 25.690000 251.670000 250.660000 248.990000 236.320000 0.000000 0.000000 0.000000 0.060000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 9.000000 10.000000 9.000000 9.000000 437.500000 428.000000 434.500000 415.000000 120.000000 128.000000 144.000000 144.000000 110.000000 110.000000 130.000000 50.000000 3.000000 3.000000 3.000000 3.000000 177.000000 177.000000 179.000000 179.00000 2.000000 2.000000 2.000000 2.000000 1.000000 1.000000 1.000000 1.000000 252.000000 252.000000 252.000000 252.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 122.070000 119.560000 122.070000 140.010000 122.070000 122.070000 122.070000 140.010000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000 1807.500000 0.000000 0.000000 0.000000 0.000000
max 7.002411e+09 109.0 0.0 0.0 0.0 27731.088000 35145.834000 33543.624000 38805.617000 7376.710000 8157.780000 10752.560000 10427.460000 8362.360000 9667.130000 14007.340000 10310.760000 13724.380000 15371.040000 13095.360000 8464.030000 3775.110000 2812.040000 5337.040000 4428.460000 6431.330000 7400.660000 10752.560000 10389.240000 4729.740000 4557.140000 4961.330000 4429.880000 1466.030000 1196.430000 928.490000 927.410000 342.860000 916.240000 502.090000 339.840000 10643.380000 7674.780000 11039.910000 11099.260000 7366.580000 8133.660000 8014.430000 9382.580000 8314.760000 9284.740000 13950.040000 10223.430000 628.560000 544.630000 516.910000 808.490000 0.0 0.0 0.0 0.0 8432.990000 10936.730000 13980.060000 11495.310000 5900.660000 5490.280000 5681.540000 4244.530000 1023.210000 2372.510000 1390.880000 1635.710000 800.890000 370.130000 394.930000 787.790000 10674.030000 11365.310000 14043.060000 11517.730000 6626.930000 9324.660000 10696.230000 10598.830000 4693.860000 4455.830000 6274.190000 5463.780000 1872.340000 1983.010000 2433.060000 4318.280000 7454.630000 9669.910000 10830.160000 10796.290000 5459.560000 5800.930000 4309.290000 3819.830000 5647.160000 6141.880000 5645.860000 5689.760000 1351.110000 1136.080000 1394.890000 1431.960000 0.0 0.0 0.0 0.0 5712.110000 6745.760000 5957.140000 5956.660000 7716.140000 9699.010000 10830.380000 10796.590000 19.760000 21.330000 16.860000 62.380000 6789.410000 5289.540000 4127.010000 5057.740000 1362.940000 1495.940000 2327.510000 1005.230000 307.000000 138.000000 196.000000 131.000000 35190.000000 40335.000000 45320.000000 37235.000000 4010.000000 4010.000000 4449.000000 3399.000000 4010.000000 4010.000000 4449.000000 3399.000000 61.000000 54.000000 60.000000 84.000000 1555.000000 1555.000000 1555.000000 1555.00000 42.000000 48.000000 44.000000 40.000000 29.000000 35.000000 45.000000 49.000000 7546.000000 4365.000000 4076.000000 4061.000000 10285.900000 7873.550000 11117.610000 8993.950000 45735.400000 28144.120000 30036.060000 39221.270000 6362.280000 4980.900000 3716.900000 13884.310000 6433.760000 4809.360000 3483.170000 3467.170000 1.000000 1.000000 1.000000 1.000000 4.000000 5.000000 5.000000 4.000000 42.000000 48.000000 44.000000 40.000000 14.000000 16.000000 16.000000 11.000000 29.000000 35.000000 41.000000 49.000000 1.000000 1.000000 1.000000 1.000000 4337.000000 12916.220000 9165.600000 11166.210000 2618.570000

In [457]:
churn.describe(include=[np.object])


Out[457]:
last_date_of_month_6 last_date_of_month_7 last_date_of_month_8 last_date_of_month_9 date_of_last_rech_6 date_of_last_rech_7 date_of_last_rech_8 date_of_last_rech_9 date_of_last_rech_data_6 date_of_last_rech_data_7 date_of_last_rech_data_8 date_of_last_rech_data_9
count 99999 99398 98899 98340 98392 98232 96377 95239 25153 25571 26339 25922
unique 1 1 1 1 30 31 31 30 30 31 31 30
top 6/30/2014 7/31/2014 8/31/2014 9/30/2014 6/30/2014 7/31/2014 8/31/2014 9/29/2014 6/30/2014 7/31/2014 8/31/2014 9/29/2014
freq 99999 99398 98899 98340 16960 17288 14706 22623 1888 1813 1998 2329

In [458]:
ObjectVars = list(churn.columns[churn.dtypes == 'object'])
print(ObjectVars)


['last_date_of_month_6', 'last_date_of_month_7', 'last_date_of_month_8', 'last_date_of_month_9', 'date_of_last_rech_6', 'date_of_last_rech_7', 'date_of_last_rech_8', 'date_of_last_rech_9', 'date_of_last_rech_data_6', 'date_of_last_rech_data_7', 'date_of_last_rech_data_8', 'date_of_last_rech_data_9']

In [0]:
#Seperating the date, ID and numerical columns
churn_date=['last_date_of_month_6',
             'last_date_of_month_7',
             'last_date_of_month_8',
             'last_date_of_month_9',
             'date_of_last_rech_6',
             'date_of_last_rech_7',
             'date_of_last_rech_8',
             'date_of_last_rech_9',
             'date_of_last_rech_data_6',
             'date_of_last_rech_data_7',
             'date_of_last_rech_data_8',
             'date_of_last_rech_data_9']
churn_id = ["mobile_number","circle_id" ]
churn_num = [col for col in churn.columns if col not in churn_date + churn_id]

In [460]:
churn_num


Out[460]:
['loc_og_t2o_mou',
 'std_og_t2o_mou',
 'loc_ic_t2o_mou',
 'arpu_6',
 'arpu_7',
 'arpu_8',
 'arpu_9',
 'onnet_mou_6',
 'onnet_mou_7',
 'onnet_mou_8',
 'onnet_mou_9',
 'offnet_mou_6',
 'offnet_mou_7',
 'offnet_mou_8',
 'offnet_mou_9',
 'roam_ic_mou_6',
 'roam_ic_mou_7',
 'roam_ic_mou_8',
 'roam_ic_mou_9',
 'roam_og_mou_6',
 'roam_og_mou_7',
 'roam_og_mou_8',
 'roam_og_mou_9',
 'loc_og_t2t_mou_6',
 'loc_og_t2t_mou_7',
 'loc_og_t2t_mou_8',
 'loc_og_t2t_mou_9',
 'loc_og_t2m_mou_6',
 'loc_og_t2m_mou_7',
 'loc_og_t2m_mou_8',
 'loc_og_t2m_mou_9',
 'loc_og_t2f_mou_6',
 'loc_og_t2f_mou_7',
 'loc_og_t2f_mou_8',
 'loc_og_t2f_mou_9',
 'loc_og_t2c_mou_6',
 'loc_og_t2c_mou_7',
 'loc_og_t2c_mou_8',
 'loc_og_t2c_mou_9',
 'loc_og_mou_6',
 'loc_og_mou_7',
 'loc_og_mou_8',
 'loc_og_mou_9',
 'std_og_t2t_mou_6',
 'std_og_t2t_mou_7',
 'std_og_t2t_mou_8',
 'std_og_t2t_mou_9',
 'std_og_t2m_mou_6',
 'std_og_t2m_mou_7',
 'std_og_t2m_mou_8',
 'std_og_t2m_mou_9',
 'std_og_t2f_mou_6',
 'std_og_t2f_mou_7',
 'std_og_t2f_mou_8',
 'std_og_t2f_mou_9',
 'std_og_t2c_mou_6',
 'std_og_t2c_mou_7',
 'std_og_t2c_mou_8',
 'std_og_t2c_mou_9',
 'std_og_mou_6',
 'std_og_mou_7',
 'std_og_mou_8',
 'std_og_mou_9',
 'isd_og_mou_6',
 'isd_og_mou_7',
 'isd_og_mou_8',
 'isd_og_mou_9',
 'spl_og_mou_6',
 'spl_og_mou_7',
 'spl_og_mou_8',
 'spl_og_mou_9',
 'og_others_6',
 'og_others_7',
 'og_others_8',
 'og_others_9',
 'total_og_mou_6',
 'total_og_mou_7',
 'total_og_mou_8',
 'total_og_mou_9',
 'loc_ic_t2t_mou_6',
 'loc_ic_t2t_mou_7',
 'loc_ic_t2t_mou_8',
 'loc_ic_t2t_mou_9',
 'loc_ic_t2m_mou_6',
 'loc_ic_t2m_mou_7',
 'loc_ic_t2m_mou_8',
 'loc_ic_t2m_mou_9',
 'loc_ic_t2f_mou_6',
 'loc_ic_t2f_mou_7',
 'loc_ic_t2f_mou_8',
 'loc_ic_t2f_mou_9',
 'loc_ic_mou_6',
 'loc_ic_mou_7',
 'loc_ic_mou_8',
 'loc_ic_mou_9',
 'std_ic_t2t_mou_6',
 'std_ic_t2t_mou_7',
 'std_ic_t2t_mou_8',
 'std_ic_t2t_mou_9',
 'std_ic_t2m_mou_6',
 'std_ic_t2m_mou_7',
 'std_ic_t2m_mou_8',
 'std_ic_t2m_mou_9',
 'std_ic_t2f_mou_6',
 'std_ic_t2f_mou_7',
 'std_ic_t2f_mou_8',
 'std_ic_t2f_mou_9',
 'std_ic_t2o_mou_6',
 'std_ic_t2o_mou_7',
 'std_ic_t2o_mou_8',
 'std_ic_t2o_mou_9',
 'std_ic_mou_6',
 'std_ic_mou_7',
 'std_ic_mou_8',
 'std_ic_mou_9',
 'total_ic_mou_6',
 'total_ic_mou_7',
 'total_ic_mou_8',
 'total_ic_mou_9',
 'spl_ic_mou_6',
 'spl_ic_mou_7',
 'spl_ic_mou_8',
 'spl_ic_mou_9',
 'isd_ic_mou_6',
 'isd_ic_mou_7',
 'isd_ic_mou_8',
 'isd_ic_mou_9',
 'ic_others_6',
 'ic_others_7',
 'ic_others_8',
 'ic_others_9',
 'total_rech_num_6',
 'total_rech_num_7',
 'total_rech_num_8',
 'total_rech_num_9',
 'total_rech_amt_6',
 'total_rech_amt_7',
 'total_rech_amt_8',
 'total_rech_amt_9',
 'max_rech_amt_6',
 'max_rech_amt_7',
 'max_rech_amt_8',
 'max_rech_amt_9',
 'last_day_rch_amt_6',
 'last_day_rch_amt_7',
 'last_day_rch_amt_8',
 'last_day_rch_amt_9',
 'total_rech_data_6',
 'total_rech_data_7',
 'total_rech_data_8',
 'total_rech_data_9',
 'max_rech_data_6',
 'max_rech_data_7',
 'max_rech_data_8',
 'max_rech_data_9',
 'count_rech_2g_6',
 'count_rech_2g_7',
 'count_rech_2g_8',
 'count_rech_2g_9',
 'count_rech_3g_6',
 'count_rech_3g_7',
 'count_rech_3g_8',
 'count_rech_3g_9',
 'av_rech_amt_data_6',
 'av_rech_amt_data_7',
 'av_rech_amt_data_8',
 'av_rech_amt_data_9',
 'vol_2g_mb_6',
 'vol_2g_mb_7',
 'vol_2g_mb_8',
 'vol_2g_mb_9',
 'vol_3g_mb_6',
 'vol_3g_mb_7',
 'vol_3g_mb_8',
 'vol_3g_mb_9',
 'arpu_3g_6',
 'arpu_3g_7',
 'arpu_3g_8',
 'arpu_3g_9',
 'arpu_2g_6',
 'arpu_2g_7',
 'arpu_2g_8',
 'arpu_2g_9',
 'night_pck_user_6',
 'night_pck_user_7',
 'night_pck_user_8',
 'night_pck_user_9',
 'monthly_2g_6',
 'monthly_2g_7',
 'monthly_2g_8',
 'monthly_2g_9',
 'sachet_2g_6',
 'sachet_2g_7',
 'sachet_2g_8',
 'sachet_2g_9',
 'monthly_3g_6',
 'monthly_3g_7',
 'monthly_3g_8',
 'monthly_3g_9',
 'sachet_3g_6',
 'sachet_3g_7',
 'sachet_3g_8',
 'sachet_3g_9',
 'fb_user_6',
 'fb_user_7',
 'fb_user_8',
 'fb_user_9',
 'aon',
 'aug_vbc_3g',
 'jul_vbc_3g',
 'jun_vbc_3g',
 'sep_vbc_3g']

In [0]:
# when you look at the CSV file you will realise that the FB and night pack users are actually categorical columns
#coded 0 and 1 for yes and no, so lets seperate them as well
churn_cat = ['fb_user_6',
 'fb_user_7',
 'fb_user_8',
 'fb_user_9','night_pck_user_6',
 'night_pck_user_7',
 'night_pck_user_8',
 'night_pck_user_9']
#new num columns will be as following
churn_num = [col for col in churn_num if col not in churn_cat]

In [462]:
churn_num


Out[462]:
['loc_og_t2o_mou',
 'std_og_t2o_mou',
 'loc_ic_t2o_mou',
 'arpu_6',
 'arpu_7',
 'arpu_8',
 'arpu_9',
 'onnet_mou_6',
 'onnet_mou_7',
 'onnet_mou_8',
 'onnet_mou_9',
 'offnet_mou_6',
 'offnet_mou_7',
 'offnet_mou_8',
 'offnet_mou_9',
 'roam_ic_mou_6',
 'roam_ic_mou_7',
 'roam_ic_mou_8',
 'roam_ic_mou_9',
 'roam_og_mou_6',
 'roam_og_mou_7',
 'roam_og_mou_8',
 'roam_og_mou_9',
 'loc_og_t2t_mou_6',
 'loc_og_t2t_mou_7',
 'loc_og_t2t_mou_8',
 'loc_og_t2t_mou_9',
 'loc_og_t2m_mou_6',
 'loc_og_t2m_mou_7',
 'loc_og_t2m_mou_8',
 'loc_og_t2m_mou_9',
 'loc_og_t2f_mou_6',
 'loc_og_t2f_mou_7',
 'loc_og_t2f_mou_8',
 'loc_og_t2f_mou_9',
 'loc_og_t2c_mou_6',
 'loc_og_t2c_mou_7',
 'loc_og_t2c_mou_8',
 'loc_og_t2c_mou_9',
 'loc_og_mou_6',
 'loc_og_mou_7',
 'loc_og_mou_8',
 'loc_og_mou_9',
 'std_og_t2t_mou_6',
 'std_og_t2t_mou_7',
 'std_og_t2t_mou_8',
 'std_og_t2t_mou_9',
 'std_og_t2m_mou_6',
 'std_og_t2m_mou_7',
 'std_og_t2m_mou_8',
 'std_og_t2m_mou_9',
 'std_og_t2f_mou_6',
 'std_og_t2f_mou_7',
 'std_og_t2f_mou_8',
 'std_og_t2f_mou_9',
 'std_og_t2c_mou_6',
 'std_og_t2c_mou_7',
 'std_og_t2c_mou_8',
 'std_og_t2c_mou_9',
 'std_og_mou_6',
 'std_og_mou_7',
 'std_og_mou_8',
 'std_og_mou_9',
 'isd_og_mou_6',
 'isd_og_mou_7',
 'isd_og_mou_8',
 'isd_og_mou_9',
 'spl_og_mou_6',
 'spl_og_mou_7',
 'spl_og_mou_8',
 'spl_og_mou_9',
 'og_others_6',
 'og_others_7',
 'og_others_8',
 'og_others_9',
 'total_og_mou_6',
 'total_og_mou_7',
 'total_og_mou_8',
 'total_og_mou_9',
 'loc_ic_t2t_mou_6',
 'loc_ic_t2t_mou_7',
 'loc_ic_t2t_mou_8',
 'loc_ic_t2t_mou_9',
 'loc_ic_t2m_mou_6',
 'loc_ic_t2m_mou_7',
 'loc_ic_t2m_mou_8',
 'loc_ic_t2m_mou_9',
 'loc_ic_t2f_mou_6',
 'loc_ic_t2f_mou_7',
 'loc_ic_t2f_mou_8',
 'loc_ic_t2f_mou_9',
 'loc_ic_mou_6',
 'loc_ic_mou_7',
 'loc_ic_mou_8',
 'loc_ic_mou_9',
 'std_ic_t2t_mou_6',
 'std_ic_t2t_mou_7',
 'std_ic_t2t_mou_8',
 'std_ic_t2t_mou_9',
 'std_ic_t2m_mou_6',
 'std_ic_t2m_mou_7',
 'std_ic_t2m_mou_8',
 'std_ic_t2m_mou_9',
 'std_ic_t2f_mou_6',
 'std_ic_t2f_mou_7',
 'std_ic_t2f_mou_8',
 'std_ic_t2f_mou_9',
 'std_ic_t2o_mou_6',
 'std_ic_t2o_mou_7',
 'std_ic_t2o_mou_8',
 'std_ic_t2o_mou_9',
 'std_ic_mou_6',
 'std_ic_mou_7',
 'std_ic_mou_8',
 'std_ic_mou_9',
 'total_ic_mou_6',
 'total_ic_mou_7',
 'total_ic_mou_8',
 'total_ic_mou_9',
 'spl_ic_mou_6',
 'spl_ic_mou_7',
 'spl_ic_mou_8',
 'spl_ic_mou_9',
 'isd_ic_mou_6',
 'isd_ic_mou_7',
 'isd_ic_mou_8',
 'isd_ic_mou_9',
 'ic_others_6',
 'ic_others_7',
 'ic_others_8',
 'ic_others_9',
 'total_rech_num_6',
 'total_rech_num_7',
 'total_rech_num_8',
 'total_rech_num_9',
 'total_rech_amt_6',
 'total_rech_amt_7',
 'total_rech_amt_8',
 'total_rech_amt_9',
 'max_rech_amt_6',
 'max_rech_amt_7',
 'max_rech_amt_8',
 'max_rech_amt_9',
 'last_day_rch_amt_6',
 'last_day_rch_amt_7',
 'last_day_rch_amt_8',
 'last_day_rch_amt_9',
 'total_rech_data_6',
 'total_rech_data_7',
 'total_rech_data_8',
 'total_rech_data_9',
 'max_rech_data_6',
 'max_rech_data_7',
 'max_rech_data_8',
 'max_rech_data_9',
 'count_rech_2g_6',
 'count_rech_2g_7',
 'count_rech_2g_8',
 'count_rech_2g_9',
 'count_rech_3g_6',
 'count_rech_3g_7',
 'count_rech_3g_8',
 'count_rech_3g_9',
 'av_rech_amt_data_6',
 'av_rech_amt_data_7',
 'av_rech_amt_data_8',
 'av_rech_amt_data_9',
 'vol_2g_mb_6',
 'vol_2g_mb_7',
 'vol_2g_mb_8',
 'vol_2g_mb_9',
 'vol_3g_mb_6',
 'vol_3g_mb_7',
 'vol_3g_mb_8',
 'vol_3g_mb_9',
 'arpu_3g_6',
 'arpu_3g_7',
 'arpu_3g_8',
 'arpu_3g_9',
 'arpu_2g_6',
 'arpu_2g_7',
 'arpu_2g_8',
 'arpu_2g_9',
 'monthly_2g_6',
 'monthly_2g_7',
 'monthly_2g_8',
 'monthly_2g_9',
 'sachet_2g_6',
 'sachet_2g_7',
 'sachet_2g_8',
 'sachet_2g_9',
 'monthly_3g_6',
 'monthly_3g_7',
 'monthly_3g_8',
 'monthly_3g_9',
 'sachet_3g_6',
 'sachet_3g_7',
 'sachet_3g_8',
 'sachet_3g_9',
 'aon',
 'aug_vbc_3g',
 'jul_vbc_3g',
 'jun_vbc_3g',
 'sep_vbc_3g']

In [463]:
round(churn.isnull().sum()/len(churn.index),4)*100


Out[463]:
mobile_number                0.00
circle_id                    0.00
loc_og_t2o_mou               1.02
std_og_t2o_mou               1.02
loc_ic_t2o_mou               1.02
last_date_of_month_6         0.00
last_date_of_month_7         0.60
last_date_of_month_8         1.10
last_date_of_month_9         1.66
arpu_6                       0.00
arpu_7                       0.00
arpu_8                       0.00
arpu_9                       0.00
onnet_mou_6                  3.94
onnet_mou_7                  3.86
onnet_mou_8                  5.38
onnet_mou_9                  7.75
offnet_mou_6                 3.94
offnet_mou_7                 3.86
offnet_mou_8                 5.38
offnet_mou_9                 7.75
roam_ic_mou_6                3.94
roam_ic_mou_7                3.86
roam_ic_mou_8                5.38
roam_ic_mou_9                7.75
roam_og_mou_6                3.94
roam_og_mou_7                3.86
roam_og_mou_8                5.38
roam_og_mou_9                7.75
loc_og_t2t_mou_6             3.94
loc_og_t2t_mou_7             3.86
loc_og_t2t_mou_8             5.38
loc_og_t2t_mou_9             7.75
loc_og_t2m_mou_6             3.94
loc_og_t2m_mou_7             3.86
loc_og_t2m_mou_8             5.38
loc_og_t2m_mou_9             7.75
loc_og_t2f_mou_6             3.94
loc_og_t2f_mou_7             3.86
loc_og_t2f_mou_8             5.38
loc_og_t2f_mou_9             7.75
loc_og_t2c_mou_6             3.94
loc_og_t2c_mou_7             3.86
loc_og_t2c_mou_8             5.38
loc_og_t2c_mou_9             7.75
loc_og_mou_6                 3.94
loc_og_mou_7                 3.86
loc_og_mou_8                 5.38
loc_og_mou_9                 7.75
std_og_t2t_mou_6             3.94
std_og_t2t_mou_7             3.86
std_og_t2t_mou_8             5.38
std_og_t2t_mou_9             7.75
std_og_t2m_mou_6             3.94
std_og_t2m_mou_7             3.86
std_og_t2m_mou_8             5.38
std_og_t2m_mou_9             7.75
std_og_t2f_mou_6             3.94
std_og_t2f_mou_7             3.86
std_og_t2f_mou_8             5.38
std_og_t2f_mou_9             7.75
std_og_t2c_mou_6             3.94
std_og_t2c_mou_7             3.86
std_og_t2c_mou_8             5.38
std_og_t2c_mou_9             7.75
std_og_mou_6                 3.94
std_og_mou_7                 3.86
std_og_mou_8                 5.38
std_og_mou_9                 7.75
isd_og_mou_6                 3.94
isd_og_mou_7                 3.86
isd_og_mou_8                 5.38
isd_og_mou_9                 7.75
spl_og_mou_6                 3.94
spl_og_mou_7                 3.86
spl_og_mou_8                 5.38
spl_og_mou_9                 7.75
og_others_6                  3.94
og_others_7                  3.86
og_others_8                  5.38
og_others_9                  7.75
total_og_mou_6               0.00
total_og_mou_7               0.00
total_og_mou_8               0.00
total_og_mou_9               0.00
loc_ic_t2t_mou_6             3.94
loc_ic_t2t_mou_7             3.86
loc_ic_t2t_mou_8             5.38
loc_ic_t2t_mou_9             7.75
loc_ic_t2m_mou_6             3.94
loc_ic_t2m_mou_7             3.86
loc_ic_t2m_mou_8             5.38
loc_ic_t2m_mou_9             7.75
loc_ic_t2f_mou_6             3.94
loc_ic_t2f_mou_7             3.86
loc_ic_t2f_mou_8             5.38
loc_ic_t2f_mou_9             7.75
loc_ic_mou_6                 3.94
loc_ic_mou_7                 3.86
loc_ic_mou_8                 5.38
loc_ic_mou_9                 7.75
std_ic_t2t_mou_6             3.94
std_ic_t2t_mou_7             3.86
std_ic_t2t_mou_8             5.38
std_ic_t2t_mou_9             7.75
std_ic_t2m_mou_6             3.94
std_ic_t2m_mou_7             3.86
std_ic_t2m_mou_8             5.38
std_ic_t2m_mou_9             7.75
std_ic_t2f_mou_6             3.94
std_ic_t2f_mou_7             3.86
std_ic_t2f_mou_8             5.38
std_ic_t2f_mou_9             7.75
std_ic_t2o_mou_6             3.94
std_ic_t2o_mou_7             3.86
std_ic_t2o_mou_8             5.38
std_ic_t2o_mou_9             7.75
std_ic_mou_6                 3.94
std_ic_mou_7                 3.86
std_ic_mou_8                 5.38
std_ic_mou_9                 7.75
total_ic_mou_6               0.00
total_ic_mou_7               0.00
total_ic_mou_8               0.00
total_ic_mou_9               0.00
spl_ic_mou_6                 3.94
spl_ic_mou_7                 3.86
spl_ic_mou_8                 5.38
spl_ic_mou_9                 7.75
isd_ic_mou_6                 3.94
isd_ic_mou_7                 3.86
isd_ic_mou_8                 5.38
isd_ic_mou_9                 7.75
ic_others_6                  3.94
ic_others_7                  3.86
ic_others_8                  5.38
ic_others_9                  7.75
total_rech_num_6             0.00
total_rech_num_7             0.00
total_rech_num_8             0.00
total_rech_num_9             0.00
total_rech_amt_6             0.00
total_rech_amt_7             0.00
total_rech_amt_8             0.00
total_rech_amt_9             0.00
max_rech_amt_6               0.00
max_rech_amt_7               0.00
max_rech_amt_8               0.00
max_rech_amt_9               0.00
date_of_last_rech_6          1.61
date_of_last_rech_7          1.77
date_of_last_rech_8          3.62
date_of_last_rech_9          4.76
last_day_rch_amt_6           0.00
last_day_rch_amt_7           0.00
last_day_rch_amt_8           0.00
last_day_rch_amt_9           0.00
date_of_last_rech_data_6    74.85
date_of_last_rech_data_7    74.43
date_of_last_rech_data_8    73.66
date_of_last_rech_data_9    74.08
total_rech_data_6           74.85
total_rech_data_7           74.43
total_rech_data_8           73.66
total_rech_data_9           74.08
max_rech_data_6             74.85
max_rech_data_7             74.43
max_rech_data_8             73.66
max_rech_data_9             74.08
count_rech_2g_6             74.85
count_rech_2g_7             74.43
count_rech_2g_8             73.66
count_rech_2g_9             74.08
count_rech_3g_6             74.85
count_rech_3g_7             74.43
count_rech_3g_8             73.66
count_rech_3g_9             74.08
av_rech_amt_data_6          74.85
av_rech_amt_data_7          74.43
av_rech_amt_data_8          73.66
av_rech_amt_data_9          74.08
vol_2g_mb_6                  0.00
vol_2g_mb_7                  0.00
vol_2g_mb_8                  0.00
vol_2g_mb_9                  0.00
vol_3g_mb_6                  0.00
vol_3g_mb_7                  0.00
vol_3g_mb_8                  0.00
vol_3g_mb_9                  0.00
arpu_3g_6                   74.85
arpu_3g_7                   74.43
arpu_3g_8                   73.66
arpu_3g_9                   74.08
arpu_2g_6                   74.85
arpu_2g_7                   74.43
arpu_2g_8                   73.66
arpu_2g_9                   74.08
night_pck_user_6            74.85
night_pck_user_7            74.43
night_pck_user_8            73.66
night_pck_user_9            74.08
monthly_2g_6                 0.00
monthly_2g_7                 0.00
monthly_2g_8                 0.00
monthly_2g_9                 0.00
sachet_2g_6                  0.00
sachet_2g_7                  0.00
sachet_2g_8                  0.00
sachet_2g_9                  0.00
monthly_3g_6                 0.00
monthly_3g_7                 0.00
monthly_3g_8                 0.00
monthly_3g_9                 0.00
sachet_3g_6                  0.00
sachet_3g_7                  0.00
sachet_3g_8                  0.00
sachet_3g_9                  0.00
fb_user_6                   74.85
fb_user_7                   74.43
fb_user_8                   73.66
fb_user_9                   74.08
aon                          0.00
aug_vbc_3g                   0.00
jul_vbc_3g                   0.00
jun_vbc_3g                   0.00
sep_vbc_3g                   0.00
dtype: float64

Missing value treatement


In [0]:
# categorical columns
churn[churn_cat] = churn[churn_cat].apply(lambda x: x.fillna(-1))

In [465]:
round(churn.isnull().sum()/len(churn.index),4)*100


Out[465]:
mobile_number                0.00
circle_id                    0.00
loc_og_t2o_mou               1.02
std_og_t2o_mou               1.02
loc_ic_t2o_mou               1.02
last_date_of_month_6         0.00
last_date_of_month_7         0.60
last_date_of_month_8         1.10
last_date_of_month_9         1.66
arpu_6                       0.00
arpu_7                       0.00
arpu_8                       0.00
arpu_9                       0.00
onnet_mou_6                  3.94
onnet_mou_7                  3.86
onnet_mou_8                  5.38
onnet_mou_9                  7.75
offnet_mou_6                 3.94
offnet_mou_7                 3.86
offnet_mou_8                 5.38
offnet_mou_9                 7.75
roam_ic_mou_6                3.94
roam_ic_mou_7                3.86
roam_ic_mou_8                5.38
roam_ic_mou_9                7.75
roam_og_mou_6                3.94
roam_og_mou_7                3.86
roam_og_mou_8                5.38
roam_og_mou_9                7.75
loc_og_t2t_mou_6             3.94
loc_og_t2t_mou_7             3.86
loc_og_t2t_mou_8             5.38
loc_og_t2t_mou_9             7.75
loc_og_t2m_mou_6             3.94
loc_og_t2m_mou_7             3.86
loc_og_t2m_mou_8             5.38
loc_og_t2m_mou_9             7.75
loc_og_t2f_mou_6             3.94
loc_og_t2f_mou_7             3.86
loc_og_t2f_mou_8             5.38
loc_og_t2f_mou_9             7.75
loc_og_t2c_mou_6             3.94
loc_og_t2c_mou_7             3.86
loc_og_t2c_mou_8             5.38
loc_og_t2c_mou_9             7.75
loc_og_mou_6                 3.94
loc_og_mou_7                 3.86
loc_og_mou_8                 5.38
loc_og_mou_9                 7.75
std_og_t2t_mou_6             3.94
std_og_t2t_mou_7             3.86
std_og_t2t_mou_8             5.38
std_og_t2t_mou_9             7.75
std_og_t2m_mou_6             3.94
std_og_t2m_mou_7             3.86
std_og_t2m_mou_8             5.38
std_og_t2m_mou_9             7.75
std_og_t2f_mou_6             3.94
std_og_t2f_mou_7             3.86
std_og_t2f_mou_8             5.38
std_og_t2f_mou_9             7.75
std_og_t2c_mou_6             3.94
std_og_t2c_mou_7             3.86
std_og_t2c_mou_8             5.38
std_og_t2c_mou_9             7.75
std_og_mou_6                 3.94
std_og_mou_7                 3.86
std_og_mou_8                 5.38
std_og_mou_9                 7.75
isd_og_mou_6                 3.94
isd_og_mou_7                 3.86
isd_og_mou_8                 5.38
isd_og_mou_9                 7.75
spl_og_mou_6                 3.94
spl_og_mou_7                 3.86
spl_og_mou_8                 5.38
spl_og_mou_9                 7.75
og_others_6                  3.94
og_others_7                  3.86
og_others_8                  5.38
og_others_9                  7.75
total_og_mou_6               0.00
total_og_mou_7               0.00
total_og_mou_8               0.00
total_og_mou_9               0.00
loc_ic_t2t_mou_6             3.94
loc_ic_t2t_mou_7             3.86
loc_ic_t2t_mou_8             5.38
loc_ic_t2t_mou_9             7.75
loc_ic_t2m_mou_6             3.94
loc_ic_t2m_mou_7             3.86
loc_ic_t2m_mou_8             5.38
loc_ic_t2m_mou_9             7.75
loc_ic_t2f_mou_6             3.94
loc_ic_t2f_mou_7             3.86
loc_ic_t2f_mou_8             5.38
loc_ic_t2f_mou_9             7.75
loc_ic_mou_6                 3.94
loc_ic_mou_7                 3.86
loc_ic_mou_8                 5.38
loc_ic_mou_9                 7.75
std_ic_t2t_mou_6             3.94
std_ic_t2t_mou_7             3.86
std_ic_t2t_mou_8             5.38
std_ic_t2t_mou_9             7.75
std_ic_t2m_mou_6             3.94
std_ic_t2m_mou_7             3.86
std_ic_t2m_mou_8             5.38
std_ic_t2m_mou_9             7.75
std_ic_t2f_mou_6             3.94
std_ic_t2f_mou_7             3.86
std_ic_t2f_mou_8             5.38
std_ic_t2f_mou_9             7.75
std_ic_t2o_mou_6             3.94
std_ic_t2o_mou_7             3.86
std_ic_t2o_mou_8             5.38
std_ic_t2o_mou_9             7.75
std_ic_mou_6                 3.94
std_ic_mou_7                 3.86
std_ic_mou_8                 5.38
std_ic_mou_9                 7.75
total_ic_mou_6               0.00
total_ic_mou_7               0.00
total_ic_mou_8               0.00
total_ic_mou_9               0.00
spl_ic_mou_6                 3.94
spl_ic_mou_7                 3.86
spl_ic_mou_8                 5.38
spl_ic_mou_9                 7.75
isd_ic_mou_6                 3.94
isd_ic_mou_7                 3.86
isd_ic_mou_8                 5.38
isd_ic_mou_9                 7.75
ic_others_6                  3.94
ic_others_7                  3.86
ic_others_8                  5.38
ic_others_9                  7.75
total_rech_num_6             0.00
total_rech_num_7             0.00
total_rech_num_8             0.00
total_rech_num_9             0.00
total_rech_amt_6             0.00
total_rech_amt_7             0.00
total_rech_amt_8             0.00
total_rech_amt_9             0.00
max_rech_amt_6               0.00
max_rech_amt_7               0.00
max_rech_amt_8               0.00
max_rech_amt_9               0.00
date_of_last_rech_6          1.61
date_of_last_rech_7          1.77
date_of_last_rech_8          3.62
date_of_last_rech_9          4.76
last_day_rch_amt_6           0.00
last_day_rch_amt_7           0.00
last_day_rch_amt_8           0.00
last_day_rch_amt_9           0.00
date_of_last_rech_data_6    74.85
date_of_last_rech_data_7    74.43
date_of_last_rech_data_8    73.66
date_of_last_rech_data_9    74.08
total_rech_data_6           74.85
total_rech_data_7           74.43
total_rech_data_8           73.66
total_rech_data_9           74.08
max_rech_data_6             74.85
max_rech_data_7             74.43
max_rech_data_8             73.66
max_rech_data_9             74.08
count_rech_2g_6             74.85
count_rech_2g_7             74.43
count_rech_2g_8             73.66
count_rech_2g_9             74.08
count_rech_3g_6             74.85
count_rech_3g_7             74.43
count_rech_3g_8             73.66
count_rech_3g_9             74.08
av_rech_amt_data_6          74.85
av_rech_amt_data_7          74.43
av_rech_amt_data_8          73.66
av_rech_amt_data_9          74.08
vol_2g_mb_6                  0.00
vol_2g_mb_7                  0.00
vol_2g_mb_8                  0.00
vol_2g_mb_9                  0.00
vol_3g_mb_6                  0.00
vol_3g_mb_7                  0.00
vol_3g_mb_8                  0.00
vol_3g_mb_9                  0.00
arpu_3g_6                   74.85
arpu_3g_7                   74.43
arpu_3g_8                   73.66
arpu_3g_9                   74.08
arpu_2g_6                   74.85
arpu_2g_7                   74.43
arpu_2g_8                   73.66
arpu_2g_9                   74.08
night_pck_user_6             0.00
night_pck_user_7             0.00
night_pck_user_8             0.00
night_pck_user_9             0.00
monthly_2g_6                 0.00
monthly_2g_7                 0.00
monthly_2g_8                 0.00
monthly_2g_9                 0.00
sachet_2g_6                  0.00
sachet_2g_7                  0.00
sachet_2g_8                  0.00
sachet_2g_9                  0.00
monthly_3g_6                 0.00
monthly_3g_7                 0.00
monthly_3g_8                 0.00
monthly_3g_9                 0.00
sachet_3g_6                  0.00
sachet_3g_7                  0.00
sachet_3g_8                  0.00
sachet_3g_9                  0.00
fb_user_6                    0.00
fb_user_7                    0.00
fb_user_8                    0.00
fb_user_9                    0.00
aon                          0.00
aug_vbc_3g                   0.00
jul_vbc_3g                   0.00
jun_vbc_3g                   0.00
sep_vbc_3g                   0.00
dtype: float64

In [466]:
churn.head(5)


Out[466]:
mobile_number circle_id loc_og_t2o_mou std_og_t2o_mou loc_ic_t2o_mou last_date_of_month_6 last_date_of_month_7 last_date_of_month_8 last_date_of_month_9 arpu_6 arpu_7 arpu_8 arpu_9 onnet_mou_6 onnet_mou_7 onnet_mou_8 onnet_mou_9 offnet_mou_6 offnet_mou_7 offnet_mou_8 offnet_mou_9 roam_ic_mou_6 roam_ic_mou_7 roam_ic_mou_8 roam_ic_mou_9 roam_og_mou_6 roam_og_mou_7 roam_og_mou_8 roam_og_mou_9 loc_og_t2t_mou_6 loc_og_t2t_mou_7 loc_og_t2t_mou_8 loc_og_t2t_mou_9 loc_og_t2m_mou_6 loc_og_t2m_mou_7 loc_og_t2m_mou_8 loc_og_t2m_mou_9 loc_og_t2f_mou_6 loc_og_t2f_mou_7 loc_og_t2f_mou_8 loc_og_t2f_mou_9 loc_og_t2c_mou_6 loc_og_t2c_mou_7 loc_og_t2c_mou_8 loc_og_t2c_mou_9 loc_og_mou_6 loc_og_mou_7 loc_og_mou_8 loc_og_mou_9 std_og_t2t_mou_6 std_og_t2t_mou_7 std_og_t2t_mou_8 std_og_t2t_mou_9 std_og_t2m_mou_6 std_og_t2m_mou_7 std_og_t2m_mou_8 std_og_t2m_mou_9 std_og_t2f_mou_6 std_og_t2f_mou_7 std_og_t2f_mou_8 std_og_t2f_mou_9 std_og_t2c_mou_6 std_og_t2c_mou_7 std_og_t2c_mou_8 std_og_t2c_mou_9 std_og_mou_6 std_og_mou_7 std_og_mou_8 std_og_mou_9 isd_og_mou_6 isd_og_mou_7 isd_og_mou_8 isd_og_mou_9 spl_og_mou_6 spl_og_mou_7 spl_og_mou_8 spl_og_mou_9 og_others_6 og_others_7 og_others_8 og_others_9 total_og_mou_6 total_og_mou_7 total_og_mou_8 total_og_mou_9 loc_ic_t2t_mou_6 loc_ic_t2t_mou_7 loc_ic_t2t_mou_8 loc_ic_t2t_mou_9 loc_ic_t2m_mou_6 loc_ic_t2m_mou_7 loc_ic_t2m_mou_8 loc_ic_t2m_mou_9 loc_ic_t2f_mou_6 loc_ic_t2f_mou_7 loc_ic_t2f_mou_8 loc_ic_t2f_mou_9 loc_ic_mou_6 loc_ic_mou_7 loc_ic_mou_8 loc_ic_mou_9 std_ic_t2t_mou_6 std_ic_t2t_mou_7 std_ic_t2t_mou_8 std_ic_t2t_mou_9 std_ic_t2m_mou_6 std_ic_t2m_mou_7 std_ic_t2m_mou_8 std_ic_t2m_mou_9 std_ic_t2f_mou_6 std_ic_t2f_mou_7 std_ic_t2f_mou_8 std_ic_t2f_mou_9 std_ic_t2o_mou_6 std_ic_t2o_mou_7 std_ic_t2o_mou_8 std_ic_t2o_mou_9 std_ic_mou_6 std_ic_mou_7 std_ic_mou_8 std_ic_mou_9 total_ic_mou_6 total_ic_mou_7 total_ic_mou_8 total_ic_mou_9 spl_ic_mou_6 spl_ic_mou_7 spl_ic_mou_8 spl_ic_mou_9 isd_ic_mou_6 isd_ic_mou_7 isd_ic_mou_8 isd_ic_mou_9 ic_others_6 ic_others_7 ic_others_8 ic_others_9 total_rech_num_6 total_rech_num_7 total_rech_num_8 total_rech_num_9 total_rech_amt_6 total_rech_amt_7 total_rech_amt_8 total_rech_amt_9 max_rech_amt_6 max_rech_amt_7 max_rech_amt_8 max_rech_amt_9 date_of_last_rech_6 date_of_last_rech_7 date_of_last_rech_8 date_of_last_rech_9 last_day_rch_amt_6 last_day_rch_amt_7 last_day_rch_amt_8 last_day_rch_amt_9 date_of_last_rech_data_6 date_of_last_rech_data_7 date_of_last_rech_data_8 date_of_last_rech_data_9 total_rech_data_6 total_rech_data_7 total_rech_data_8 total_rech_data_9 max_rech_data_6 max_rech_data_7 max_rech_data_8 max_rech_data_9 count_rech_2g_6 count_rech_2g_7 count_rech_2g_8 count_rech_2g_9 count_rech_3g_6 count_rech_3g_7 count_rech_3g_8 count_rech_3g_9 av_rech_amt_data_6 av_rech_amt_data_7 av_rech_amt_data_8 av_rech_amt_data_9 vol_2g_mb_6 vol_2g_mb_7 vol_2g_mb_8 vol_2g_mb_9 vol_3g_mb_6 vol_3g_mb_7 vol_3g_mb_8 vol_3g_mb_9 arpu_3g_6 arpu_3g_7 arpu_3g_8 arpu_3g_9 arpu_2g_6 arpu_2g_7 arpu_2g_8 arpu_2g_9 night_pck_user_6 night_pck_user_7 night_pck_user_8 night_pck_user_9 monthly_2g_6 monthly_2g_7 monthly_2g_8 monthly_2g_9 sachet_2g_6 sachet_2g_7 sachet_2g_8 sachet_2g_9 monthly_3g_6 monthly_3g_7 monthly_3g_8 monthly_3g_9 sachet_3g_6 sachet_3g_7 sachet_3g_8 sachet_3g_9 fb_user_6 fb_user_7 fb_user_8 fb_user_9 aon aug_vbc_3g jul_vbc_3g jun_vbc_3g sep_vbc_3g
0 7000842753 109 0.0 0.0 0.0 6/30/2014 7/31/2014 8/31/2014 9/30/2014 197.385 214.816 213.803 21.100 NaN NaN 0.00 NaN NaN NaN 0.00 NaN NaN NaN 0.00 NaN NaN NaN 0.00 NaN NaN NaN 0.00 NaN NaN NaN 0.00 NaN NaN NaN 0.00 NaN NaN NaN 0.00 NaN NaN NaN 0.00 NaN NaN NaN 0.00 NaN NaN NaN 0.00 NaN NaN NaN 0.00 NaN NaN NaN 0.0 NaN NaN NaN 0.00 NaN NaN NaN 0.0 NaN NaN NaN 0.00 NaN NaN NaN 0.0 NaN 0.00 0.00 0.00 0.00 NaN NaN 0.16 NaN NaN NaN 4.13 NaN NaN NaN 1.15 NaN NaN NaN 5.44 NaN NaN NaN 0.00 NaN NaN NaN 0.00 NaN NaN NaN 0.00 NaN NaN NaN 0.0 NaN NaN NaN 0.00 NaN 0.00 0.00 5.44 0.00 NaN NaN 0.0 NaN NaN NaN 0.0 NaN NaN NaN 0.0 NaN 4 3 2 6 362 252 252 0 252 252 252 0 6/21/2014 7/16/2014 8/8/2014 9/28/2014 252 252 252 0 6/21/2014 7/16/2014 8/8/2014 NaN 1.0 1.0 1.0 NaN 252.0 252.0 252.0 NaN 0.0 0.0 0.0 NaN 1.0 1.0 1.0 NaN 252.0 252.0 252.0 NaN 30.13 1.32 5.75 0.0 83.57 150.76 109.61 0.00 212.17 212.17 212.17 NaN 212.17 212.17 212.17 NaN 0.0 0.0 0.0 -1.0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 0 1.0 1.0 1.0 -1.0 968 30.4 0.0 101.20 3.58
1 7001865778 109 0.0 0.0 0.0 6/30/2014 7/31/2014 8/31/2014 9/30/2014 34.047 355.074 268.321 86.285 24.11 78.68 7.68 18.34 15.74 99.84 304.76 53.76 0.0 0.00 0.00 0.00 0.0 0.00 0.00 0.00 23.88 74.56 7.68 18.34 11.51 75.94 291.86 53.76 0.00 0.00 0.00 0.00 0.0 2.91 0.00 0.00 35.39 150.51 299.54 72.11 0.23 4.11 0.00 0.00 0.00 0.46 0.13 0.00 0.00 0.00 0.00 0.0 0.0 0.0 0.0 0.0 0.23 4.58 0.13 0.00 0.0 0.0 0.0 0.0 4.68 23.43 12.76 0.00 0.00 0.0 0.0 0.0 40.31 178.53 312.44 72.11 1.61 29.91 29.23 116.09 17.48 65.38 375.58 56.93 0.00 8.93 3.61 0.00 19.09 104.23 408.43 173.03 0.00 0.00 2.35 0.00 5.90 0.00 12.49 15.01 0.00 0.00 0.00 0.00 0.0 0.0 0.0 0.0 5.90 0.00 14.84 15.01 26.83 104.23 423.28 188.04 0.00 0.0 0.0 0.00 1.83 0.00 0.0 0.00 0.00 0.00 0.0 0.00 4 9 11 5 74 384 283 121 44 154 65 50 6/29/2014 7/31/2014 8/28/2014 9/30/2014 44 23 30 0 NaN 7/25/2014 8/10/2014 NaN NaN 1.0 2.0 NaN NaN 154.0 25.0 NaN NaN 1.0 2.0 NaN NaN 0.0 0.0 NaN NaN 154.0 50.0 NaN 0.00 108.07 365.47 0.0 0.00 0.00 0.00 0.00 NaN 0.00 0.00 NaN NaN 28.61 7.60 NaN -1.0 0.0 0.0 -1.0 0 1 0 0 0 0 2 0 0 0 0 0 0 0 0 0 -1.0 1.0 1.0 -1.0 1006 0.0 0.0 0.00 0.00
2 7001625959 109 0.0 0.0 0.0 6/30/2014 7/31/2014 8/31/2014 9/30/2014 167.690 189.058 210.226 290.714 11.54 55.24 37.26 74.81 143.33 220.59 208.36 118.91 0.0 0.00 0.00 38.49 0.0 0.00 0.00 70.94 7.19 28.74 13.58 14.39 29.34 16.86 38.46 28.16 24.11 21.79 15.61 22.24 0.0 135.54 45.76 0.48 60.66 67.41 67.66 64.81 4.34 26.49 22.58 8.76 41.81 67.41 75.53 9.28 1.48 14.76 22.83 0.0 0.0 0.0 0.0 0.0 47.64 108.68 120.94 18.04 0.0 0.0 0.0 0.0 46.56 236.84 96.84 42.08 0.45 0.0 0.0 0.0 155.33 412.94 285.46 124.94 115.69 71.11 67.46 148.23 14.38 15.44 38.89 38.98 99.48 122.29 49.63 158.19 229.56 208.86 155.99 345.41 72.41 71.29 28.69 49.44 45.18 177.01 167.09 118.18 21.73 58.34 43.23 3.86 0.0 0.0 0.0 0.0 139.33 306.66 239.03 171.49 370.04 519.53 395.03 517.74 0.21 0.0 0.0 0.45 0.00 0.85 0.0 0.01 0.93 3.14 0.0 0.36 5 4 2 7 168 315 116 358 86 200 86 100 6/17/2014 7/24/2014 8/14/2014 9/29/2014 0 200 86 0 NaN NaN NaN 9/17/2014 NaN NaN NaN 1.0 NaN NaN NaN 46.0 NaN NaN NaN 1.0 NaN NaN NaN 0.0 NaN NaN NaN 46.0 0.00 0.00 0.00 0.0 0.00 0.00 0.00 8.42 NaN NaN NaN 2.84 NaN NaN NaN 0.0 -1.0 -1.0 -1.0 0.0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 -1.0 -1.0 -1.0 1.0 1103 0.0 0.0 4.17 0.00
3 7001204172 109 0.0 0.0 0.0 6/30/2014 7/31/2014 8/31/2014 9/30/2014 221.338 251.102 508.054 389.500 99.91 54.39 310.98 241.71 123.31 109.01 71.68 113.54 0.0 54.86 44.38 0.00 0.0 28.09 39.04 0.00 73.68 34.81 10.61 15.49 107.43 83.21 22.46 65.46 1.91 0.65 4.91 2.06 0.0 0.00 0.00 0.00 183.03 118.68 37.99 83.03 26.23 14.89 289.58 226.21 2.99 1.73 6.53 9.99 0.00 0.00 0.00 0.0 0.0 0.0 0.0 0.0 29.23 16.63 296.11 236.21 0.0 0.0 0.0 0.0 10.96 0.00 18.09 43.29 0.00 0.0 0.0 0.0 223.23 135.31 352.21 362.54 62.08 19.98 8.04 41.73 113.96 64.51 20.28 52.86 57.43 27.09 19.84 65.59 233.48 111.59 48.18 160.19 43.48 66.44 0.00 129.84 1.33 38.56 4.94 13.98 1.18 0.00 0.00 0.00 0.0 0.0 0.0 0.0 45.99 105.01 4.94 143.83 280.08 216.61 53.13 305.38 0.59 0.0 0.0 0.55 0.00 0.00 0.0 0.00 0.00 0.00 0.0 0.80 10 11 18 14 230 310 601 410 60 50 50 50 6/28/2014 7/31/2014 8/31/2014 9/30/2014 30 50 50 30 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 0.00 0.00 0.00 0.0 0.00 0.00 0.00 0.00 NaN NaN NaN NaN NaN NaN NaN NaN -1.0 -1.0 -1.0 -1.0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1.0 -1.0 -1.0 -1.0 2491 0.0 0.0 0.00 0.00
4 7000142493 109 0.0 0.0 0.0 6/30/2014 7/31/2014 8/31/2014 9/30/2014 261.636 309.876 238.174 163.426 50.31 149.44 83.89 58.78 76.96 91.88 124.26 45.81 0.0 0.00 0.00 0.00 0.0 0.00 0.00 0.00 50.31 149.44 83.89 58.78 67.64 91.88 124.26 37.89 0.00 0.00 0.00 1.93 0.0 0.00 0.00 0.00 117.96 241.33 208.16 98.61 0.00 0.00 0.00 0.00 9.31 0.00 0.00 0.00 0.00 0.00 0.00 0.0 0.0 0.0 0.0 0.0 9.31 0.00 0.00 0.00 0.0 0.0 0.0 0.0 0.00 0.00 0.00 5.98 0.00 0.0 0.0 0.0 127.28 241.33 208.16 104.59 105.68 88.49 233.81 154.56 106.84 109.54 104.13 48.24 1.50 0.00 0.00 0.00 214.03 198.04 337.94 202.81 0.00 0.00 0.86 2.31 1.93 0.25 0.00 0.00 0.00 0.00 0.00 0.00 0.0 0.0 0.0 0.0 1.93 0.25 0.86 2.31 216.44 198.29 338.81 205.31 0.00 0.0 0.0 0.18 0.00 0.00 0.0 0.00 0.48 0.00 0.0 0.00 5 6 3 4 196 350 287 200 56 110 110 50 6/26/2014 7/28/2014 8/9/2014 9/28/2014 50 110 110 50 6/4/2014 NaN NaN NaN 1.0 NaN NaN NaN 56.0 NaN NaN NaN 1.0 NaN NaN NaN 0.0 NaN NaN NaN 56.0 NaN NaN NaN 0.00 0.00 0.00 0.0 0.00 0.00 0.00 0.00 0.00 NaN NaN NaN 0.00 NaN NaN NaN 0.0 -1.0 -1.0 -1.0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0.0 -1.0 -1.0 -1.0 1526 0.0 0.0 0.00 0.00

In [0]:
#we can impute the recharge columns such as total, avg and max with 0 as null valiue idicates no recharge
i = ["total_rech_data_6"
,"total_rech_data_7"
,"total_rech_data_8"
,"total_rech_data_9"
,"av_rech_amt_data_6"
,"av_rech_amt_data_7"
,"av_rech_amt_data_8"
,"av_rech_amt_data_9"
,"max_rech_data_6"
,"max_rech_data_7"
,"max_rech_data_8"
,"max_rech_data_9"]
churn[i]=churn[i].apply(lambda x: x.fillna(0))

In [468]:
churn[i].isna


Out[468]:
<bound method DataFrame.isna of        total_rech_data_6  total_rech_data_7  total_rech_data_8  total_rech_data_9  av_rech_amt_data_6  av_rech_amt_data_7  av_rech_amt_data_8  av_rech_amt_data_9  max_rech_data_6  max_rech_data_7  max_rech_data_8  max_rech_data_9
0                    1.0                1.0                1.0                0.0               252.0               252.0               252.0                 0.0            252.0            252.0            252.0              0.0
1                    0.0                1.0                2.0                0.0                 0.0               154.0                50.0                 0.0              0.0            154.0             25.0              0.0
2                    0.0                0.0                0.0                1.0                 0.0                 0.0                 0.0                46.0              0.0              0.0              0.0             46.0
3                    0.0                0.0                0.0                0.0                 0.0                 0.0                 0.0                 0.0              0.0              0.0              0.0              0.0
4                    1.0                0.0                0.0                0.0                56.0                 0.0                 0.0                 0.0             56.0              0.0              0.0              0.0
...                  ...                ...                ...                ...                 ...                 ...                 ...                 ...              ...              ...              ...              ...
99994                0.0                0.0                0.0                0.0                 0.0                 0.0                 0.0                 0.0              0.0              0.0              0.0              0.0
99995                0.0                0.0                0.0                0.0                 0.0                 0.0                 0.0                 0.0              0.0              0.0              0.0              0.0
99996                2.0                0.0                0.0                0.0                39.0                 0.0                 0.0                 0.0             25.0              0.0              0.0              0.0
99997                3.0                2.0                4.0                4.0               583.0               358.0               716.0               862.0            202.0            179.0            179.0            252.0
99998                1.0                0.0                0.0                0.0               154.0                 0.0                 0.0                 0.0            154.0              0.0              0.0              0.0

[99999 rows x 12 columns]>

In [469]:
round(churn.isnull().sum()/len(churn.index),4)*100 > 70


Out[469]:
mobile_number               False
circle_id                   False
loc_og_t2o_mou              False
std_og_t2o_mou              False
loc_ic_t2o_mou              False
last_date_of_month_6        False
last_date_of_month_7        False
last_date_of_month_8        False
last_date_of_month_9        False
arpu_6                      False
arpu_7                      False
arpu_8                      False
arpu_9                      False
onnet_mou_6                 False
onnet_mou_7                 False
onnet_mou_8                 False
onnet_mou_9                 False
offnet_mou_6                False
offnet_mou_7                False
offnet_mou_8                False
offnet_mou_9                False
roam_ic_mou_6               False
roam_ic_mou_7               False
roam_ic_mou_8               False
roam_ic_mou_9               False
roam_og_mou_6               False
roam_og_mou_7               False
roam_og_mou_8               False
roam_og_mou_9               False
loc_og_t2t_mou_6            False
loc_og_t2t_mou_7            False
loc_og_t2t_mou_8            False
loc_og_t2t_mou_9            False
loc_og_t2m_mou_6            False
loc_og_t2m_mou_7            False
loc_og_t2m_mou_8            False
loc_og_t2m_mou_9            False
loc_og_t2f_mou_6            False
loc_og_t2f_mou_7            False
loc_og_t2f_mou_8            False
loc_og_t2f_mou_9            False
loc_og_t2c_mou_6            False
loc_og_t2c_mou_7            False
loc_og_t2c_mou_8            False
loc_og_t2c_mou_9            False
loc_og_mou_6                False
loc_og_mou_7                False
loc_og_mou_8                False
loc_og_mou_9                False
std_og_t2t_mou_6            False
std_og_t2t_mou_7            False
std_og_t2t_mou_8            False
std_og_t2t_mou_9            False
std_og_t2m_mou_6            False
std_og_t2m_mou_7            False
std_og_t2m_mou_8            False
std_og_t2m_mou_9            False
std_og_t2f_mou_6            False
std_og_t2f_mou_7            False
std_og_t2f_mou_8            False
std_og_t2f_mou_9            False
std_og_t2c_mou_6            False
std_og_t2c_mou_7            False
std_og_t2c_mou_8            False
std_og_t2c_mou_9            False
std_og_mou_6                False
std_og_mou_7                False
std_og_mou_8                False
std_og_mou_9                False
isd_og_mou_6                False
isd_og_mou_7                False
isd_og_mou_8                False
isd_og_mou_9                False
spl_og_mou_6                False
spl_og_mou_7                False
spl_og_mou_8                False
spl_og_mou_9                False
og_others_6                 False
og_others_7                 False
og_others_8                 False
og_others_9                 False
total_og_mou_6              False
total_og_mou_7              False
total_og_mou_8              False
total_og_mou_9              False
loc_ic_t2t_mou_6            False
loc_ic_t2t_mou_7            False
loc_ic_t2t_mou_8            False
loc_ic_t2t_mou_9            False
loc_ic_t2m_mou_6            False
loc_ic_t2m_mou_7            False
loc_ic_t2m_mou_8            False
loc_ic_t2m_mou_9            False
loc_ic_t2f_mou_6            False
loc_ic_t2f_mou_7            False
loc_ic_t2f_mou_8            False
loc_ic_t2f_mou_9            False
loc_ic_mou_6                False
loc_ic_mou_7                False
loc_ic_mou_8                False
loc_ic_mou_9                False
std_ic_t2t_mou_6            False
std_ic_t2t_mou_7            False
std_ic_t2t_mou_8            False
std_ic_t2t_mou_9            False
std_ic_t2m_mou_6            False
std_ic_t2m_mou_7            False
std_ic_t2m_mou_8            False
std_ic_t2m_mou_9            False
std_ic_t2f_mou_6            False
std_ic_t2f_mou_7            False
std_ic_t2f_mou_8            False
std_ic_t2f_mou_9            False
std_ic_t2o_mou_6            False
std_ic_t2o_mou_7            False
std_ic_t2o_mou_8            False
std_ic_t2o_mou_9            False
std_ic_mou_6                False
std_ic_mou_7                False
std_ic_mou_8                False
std_ic_mou_9                False
total_ic_mou_6              False
total_ic_mou_7              False
total_ic_mou_8              False
total_ic_mou_9              False
spl_ic_mou_6                False
spl_ic_mou_7                False
spl_ic_mou_8                False
spl_ic_mou_9                False
isd_ic_mou_6                False
isd_ic_mou_7                False
isd_ic_mou_8                False
isd_ic_mou_9                False
ic_others_6                 False
ic_others_7                 False
ic_others_8                 False
ic_others_9                 False
total_rech_num_6            False
total_rech_num_7            False
total_rech_num_8            False
total_rech_num_9            False
total_rech_amt_6            False
total_rech_amt_7            False
total_rech_amt_8            False
total_rech_amt_9            False
max_rech_amt_6              False
max_rech_amt_7              False
max_rech_amt_8              False
max_rech_amt_9              False
date_of_last_rech_6         False
date_of_last_rech_7         False
date_of_last_rech_8         False
date_of_last_rech_9         False
last_day_rch_amt_6          False
last_day_rch_amt_7          False
last_day_rch_amt_8          False
last_day_rch_amt_9          False
date_of_last_rech_data_6     True
date_of_last_rech_data_7     True
date_of_last_rech_data_8     True
date_of_last_rech_data_9     True
total_rech_data_6           False
total_rech_data_7           False
total_rech_data_8           False
total_rech_data_9           False
max_rech_data_6             False
max_rech_data_7             False
max_rech_data_8             False
max_rech_data_9             False
count_rech_2g_6              True
count_rech_2g_7              True
count_rech_2g_8              True
count_rech_2g_9              True
count_rech_3g_6              True
count_rech_3g_7              True
count_rech_3g_8              True
count_rech_3g_9              True
av_rech_amt_data_6          False
av_rech_amt_data_7          False
av_rech_amt_data_8          False
av_rech_amt_data_9          False
vol_2g_mb_6                 False
vol_2g_mb_7                 False
vol_2g_mb_8                 False
vol_2g_mb_9                 False
vol_3g_mb_6                 False
vol_3g_mb_7                 False
vol_3g_mb_8                 False
vol_3g_mb_9                 False
arpu_3g_6                    True
arpu_3g_7                    True
arpu_3g_8                    True
arpu_3g_9                    True
arpu_2g_6                    True
arpu_2g_7                    True
arpu_2g_8                    True
arpu_2g_9                    True
night_pck_user_6            False
night_pck_user_7            False
night_pck_user_8            False
night_pck_user_9            False
monthly_2g_6                False
monthly_2g_7                False
monthly_2g_8                False
monthly_2g_9                False
sachet_2g_6                 False
sachet_2g_7                 False
sachet_2g_8                 False
sachet_2g_9                 False
monthly_3g_6                False
monthly_3g_7                False
monthly_3g_8                False
monthly_3g_9                False
sachet_3g_6                 False
sachet_3g_7                 False
sachet_3g_8                 False
sachet_3g_9                 False
fb_user_6                   False
fb_user_7                   False
fb_user_8                   False
fb_user_9                   False
aon                         False
aug_vbc_3g                  False
jul_vbc_3g                  False
jun_vbc_3g                  False
sep_vbc_3g                  False
dtype: bool

In [0]:
j=["date_of_last_rech_data_6",'date_of_last_rech_data_7',
      'date_of_last_rech_data_8','date_of_last_rech_data_9']
churn[j]=churn[j].apply(lambda x: x.fillna(0))

There are many columns with more than 70% null values These columns don't add any value to the data analysis, so lets remove them


In [471]:
#now lets remove these columns
removecolumn = churn.columns[100*(churn.isnull().sum()/len(churn.index)) > 70]
print(removecolumn)


Index(['count_rech_2g_6', 'count_rech_2g_7', 'count_rech_2g_8', 'count_rech_2g_9', 'count_rech_3g_6', 'count_rech_3g_7', 'count_rech_3g_8', 'count_rech_3g_9', 'arpu_3g_6', 'arpu_3g_7', 'arpu_3g_8', 'arpu_3g_9', 'arpu_2g_6', 'arpu_2g_7', 'arpu_2g_8', 'arpu_2g_9'], dtype='object')

In [472]:
churn=churn.drop(removecolumn,axis=1)
churn.shape


Out[472]:
(99999, 210)

In [473]:
#lets drop id columns as well
churn=churn.drop(churn_id,axis=1)
churn.shape


Out[473]:
(99999, 208)

In [474]:
#identifying columns with missing values >0
x=churn.columns[100*(churn.isnull().sum()/len(churn.index)) > 0]
x


Out[474]:
Index(['loc_og_t2o_mou', 'std_og_t2o_mou', 'loc_ic_t2o_mou', 'last_date_of_month_7', 'last_date_of_month_8', 'last_date_of_month_9', 'onnet_mou_6', 'onnet_mou_7', 'onnet_mou_8', 'onnet_mou_9',
       ...
       'isd_ic_mou_8', 'isd_ic_mou_9', 'ic_others_6', 'ic_others_7', 'ic_others_8', 'ic_others_9', 'date_of_last_rech_6', 'date_of_last_rech_7', 'date_of_last_rech_8', 'date_of_last_rech_9'], dtype='object', length=126)

In [0]:
#imputing with 0
churn[x]=churn[x].apply(lambda x: x.fillna(0))

In [476]:
round(churn.isnull().sum()/len(churn.index),4)*100


Out[476]:
loc_og_t2o_mou              0.0
std_og_t2o_mou              0.0
loc_ic_t2o_mou              0.0
last_date_of_month_6        0.0
last_date_of_month_7        0.0
last_date_of_month_8        0.0
last_date_of_month_9        0.0
arpu_6                      0.0
arpu_7                      0.0
arpu_8                      0.0
arpu_9                      0.0
onnet_mou_6                 0.0
onnet_mou_7                 0.0
onnet_mou_8                 0.0
onnet_mou_9                 0.0
offnet_mou_6                0.0
offnet_mou_7                0.0
offnet_mou_8                0.0
offnet_mou_9                0.0
roam_ic_mou_6               0.0
roam_ic_mou_7               0.0
roam_ic_mou_8               0.0
roam_ic_mou_9               0.0
roam_og_mou_6               0.0
roam_og_mou_7               0.0
roam_og_mou_8               0.0
roam_og_mou_9               0.0
loc_og_t2t_mou_6            0.0
loc_og_t2t_mou_7            0.0
loc_og_t2t_mou_8            0.0
loc_og_t2t_mou_9            0.0
loc_og_t2m_mou_6            0.0
loc_og_t2m_mou_7            0.0
loc_og_t2m_mou_8            0.0
loc_og_t2m_mou_9            0.0
loc_og_t2f_mou_6            0.0
loc_og_t2f_mou_7            0.0
loc_og_t2f_mou_8            0.0
loc_og_t2f_mou_9            0.0
loc_og_t2c_mou_6            0.0
loc_og_t2c_mou_7            0.0
loc_og_t2c_mou_8            0.0
loc_og_t2c_mou_9            0.0
loc_og_mou_6                0.0
loc_og_mou_7                0.0
loc_og_mou_8                0.0
loc_og_mou_9                0.0
std_og_t2t_mou_6            0.0
std_og_t2t_mou_7            0.0
std_og_t2t_mou_8            0.0
std_og_t2t_mou_9            0.0
std_og_t2m_mou_6            0.0
std_og_t2m_mou_7            0.0
std_og_t2m_mou_8            0.0
std_og_t2m_mou_9            0.0
std_og_t2f_mou_6            0.0
std_og_t2f_mou_7            0.0
std_og_t2f_mou_8            0.0
std_og_t2f_mou_9            0.0
std_og_t2c_mou_6            0.0
std_og_t2c_mou_7            0.0
std_og_t2c_mou_8            0.0
std_og_t2c_mou_9            0.0
std_og_mou_6                0.0
std_og_mou_7                0.0
std_og_mou_8                0.0
std_og_mou_9                0.0
isd_og_mou_6                0.0
isd_og_mou_7                0.0
isd_og_mou_8                0.0
isd_og_mou_9                0.0
spl_og_mou_6                0.0
spl_og_mou_7                0.0
spl_og_mou_8                0.0
spl_og_mou_9                0.0
og_others_6                 0.0
og_others_7                 0.0
og_others_8                 0.0
og_others_9                 0.0
total_og_mou_6              0.0
total_og_mou_7              0.0
total_og_mou_8              0.0
total_og_mou_9              0.0
loc_ic_t2t_mou_6            0.0
loc_ic_t2t_mou_7            0.0
loc_ic_t2t_mou_8            0.0
loc_ic_t2t_mou_9            0.0
loc_ic_t2m_mou_6            0.0
loc_ic_t2m_mou_7            0.0
loc_ic_t2m_mou_8            0.0
loc_ic_t2m_mou_9            0.0
loc_ic_t2f_mou_6            0.0
loc_ic_t2f_mou_7            0.0
loc_ic_t2f_mou_8            0.0
loc_ic_t2f_mou_9            0.0
loc_ic_mou_6                0.0
loc_ic_mou_7                0.0
loc_ic_mou_8                0.0
loc_ic_mou_9                0.0
std_ic_t2t_mou_6            0.0
std_ic_t2t_mou_7            0.0
std_ic_t2t_mou_8            0.0
std_ic_t2t_mou_9            0.0
std_ic_t2m_mou_6            0.0
std_ic_t2m_mou_7            0.0
std_ic_t2m_mou_8            0.0
std_ic_t2m_mou_9            0.0
std_ic_t2f_mou_6            0.0
std_ic_t2f_mou_7            0.0
std_ic_t2f_mou_8            0.0
std_ic_t2f_mou_9            0.0
std_ic_t2o_mou_6            0.0
std_ic_t2o_mou_7            0.0
std_ic_t2o_mou_8            0.0
std_ic_t2o_mou_9            0.0
std_ic_mou_6                0.0
std_ic_mou_7                0.0
std_ic_mou_8                0.0
std_ic_mou_9                0.0
total_ic_mou_6              0.0
total_ic_mou_7              0.0
total_ic_mou_8              0.0
total_ic_mou_9              0.0
spl_ic_mou_6                0.0
spl_ic_mou_7                0.0
spl_ic_mou_8                0.0
spl_ic_mou_9                0.0
isd_ic_mou_6                0.0
isd_ic_mou_7                0.0
isd_ic_mou_8                0.0
isd_ic_mou_9                0.0
ic_others_6                 0.0
ic_others_7                 0.0
ic_others_8                 0.0
ic_others_9                 0.0
total_rech_num_6            0.0
total_rech_num_7            0.0
total_rech_num_8            0.0
total_rech_num_9            0.0
total_rech_amt_6            0.0
total_rech_amt_7            0.0
total_rech_amt_8            0.0
total_rech_amt_9            0.0
max_rech_amt_6              0.0
max_rech_amt_7              0.0
max_rech_amt_8              0.0
max_rech_amt_9              0.0
date_of_last_rech_6         0.0
date_of_last_rech_7         0.0
date_of_last_rech_8         0.0
date_of_last_rech_9         0.0
last_day_rch_amt_6          0.0
last_day_rch_amt_7          0.0
last_day_rch_amt_8          0.0
last_day_rch_amt_9          0.0
date_of_last_rech_data_6    0.0
date_of_last_rech_data_7    0.0
date_of_last_rech_data_8    0.0
date_of_last_rech_data_9    0.0
total_rech_data_6           0.0
total_rech_data_7           0.0
total_rech_data_8           0.0
total_rech_data_9           0.0
max_rech_data_6             0.0
max_rech_data_7             0.0
max_rech_data_8             0.0
max_rech_data_9             0.0
av_rech_amt_data_6          0.0
av_rech_amt_data_7          0.0
av_rech_amt_data_8          0.0
av_rech_amt_data_9          0.0
vol_2g_mb_6                 0.0
vol_2g_mb_7                 0.0
vol_2g_mb_8                 0.0
vol_2g_mb_9                 0.0
vol_3g_mb_6                 0.0
vol_3g_mb_7                 0.0
vol_3g_mb_8                 0.0
vol_3g_mb_9                 0.0
night_pck_user_6            0.0
night_pck_user_7            0.0
night_pck_user_8            0.0
night_pck_user_9            0.0
monthly_2g_6                0.0
monthly_2g_7                0.0
monthly_2g_8                0.0
monthly_2g_9                0.0
sachet_2g_6                 0.0
sachet_2g_7                 0.0
sachet_2g_8                 0.0
sachet_2g_9                 0.0
monthly_3g_6                0.0
monthly_3g_7                0.0
monthly_3g_8                0.0
monthly_3g_9                0.0
sachet_3g_6                 0.0
sachet_3g_7                 0.0
sachet_3g_8                 0.0
sachet_3g_9                 0.0
fb_user_6                   0.0
fb_user_7                   0.0
fb_user_8                   0.0
fb_user_9                   0.0
aon                         0.0
aug_vbc_3g                  0.0
jul_vbc_3g                  0.0
jun_vbc_3g                  0.0
sep_vbc_3g                  0.0
dtype: float64

In [477]:
churn.nunique()


Out[477]:
loc_og_t2o_mou                  1
std_og_t2o_mou                  1
loc_ic_t2o_mou                  1
last_date_of_month_6            1
last_date_of_month_7            2
last_date_of_month_8            2
last_date_of_month_9            2
arpu_6                      85681
arpu_7                      85308
arpu_8                      83615
arpu_9                      79937
onnet_mou_6                 24313
onnet_mou_7                 24336
onnet_mou_8                 24089
onnet_mou_9                 23565
offnet_mou_6                31140
offnet_mou_7                31023
offnet_mou_8                30908
offnet_mou_9                30077
roam_ic_mou_6                6512
roam_ic_mou_7                5230
roam_ic_mou_8                5315
roam_ic_mou_9                4827
roam_og_mou_6                8038
roam_og_mou_7                6639
roam_og_mou_8                6504
roam_og_mou_9                5882
loc_og_t2t_mou_6            13539
loc_og_t2t_mou_7            13411
loc_og_t2t_mou_8            13336
loc_og_t2t_mou_9            12949
loc_og_t2m_mou_6            20905
loc_og_t2m_mou_7            20637
loc_og_t2m_mou_8            20544
loc_og_t2m_mou_9            20141
loc_og_t2f_mou_6             3860
loc_og_t2f_mou_7             3863
loc_og_t2f_mou_8             3807
loc_og_t2f_mou_9             3758
loc_og_t2c_mou_6             2235
loc_og_t2c_mou_7             2426
loc_og_t2c_mou_8             2516
loc_og_t2c_mou_9             2332
loc_og_mou_6                26372
loc_og_mou_7                26091
loc_og_mou_8                25990
loc_og_mou_9                25376
std_og_t2t_mou_6            18244
std_og_t2t_mou_7            18567
std_og_t2t_mou_8            18291
std_og_t2t_mou_9            17934
std_og_t2m_mou_6            19734
std_og_t2m_mou_7            20018
std_og_t2m_mou_8            19786
std_og_t2m_mou_9            19052
std_og_t2f_mou_6             2450
std_og_t2f_mou_7             2391
std_og_t2f_mou_8             2333
std_og_t2f_mou_9             2295
std_og_t2c_mou_6                1
std_og_t2c_mou_7                1
std_og_t2c_mou_8                1
std_og_t2c_mou_9                1
std_og_mou_6                27502
std_og_mou_7                27951
std_og_mou_8                27491
std_og_mou_9                26553
isd_og_mou_6                 1381
isd_og_mou_7                 1380
isd_og_mou_8                 1276
isd_og_mou_9                 1255
spl_og_mou_6                 3965
spl_og_mou_7                 4396
spl_og_mou_8                 4390
spl_og_mou_9                 4095
og_others_6                  1018
og_others_7                   187
og_others_8                   216
og_others_9                   235
total_og_mou_6              40327
total_og_mou_7              40477
total_og_mou_8              40074
total_og_mou_9              39160
loc_ic_t2t_mou_6            13540
loc_ic_t2t_mou_7            13511
loc_ic_t2t_mou_8            13346
loc_ic_t2t_mou_9            12993
loc_ic_t2m_mou_6            22065
loc_ic_t2m_mou_7            21918
loc_ic_t2m_mou_8            21886
loc_ic_t2m_mou_9            21484
loc_ic_t2f_mou_6             7250
loc_ic_t2f_mou_7             7395
loc_ic_t2f_mou_8             7097
loc_ic_t2f_mou_9             7091
loc_ic_mou_6                28569
loc_ic_mou_7                28390
loc_ic_mou_8                28200
loc_ic_mou_9                27697
std_ic_t2t_mou_6             6279
std_ic_t2t_mou_7             6481
std_ic_t2t_mou_8             6352
std_ic_t2t_mou_9             6157
std_ic_t2m_mou_6             9308
std_ic_t2m_mou_7             9464
std_ic_t2m_mou_8             9304
std_ic_t2m_mou_9             8933
std_ic_t2f_mou_6             3125
std_ic_t2f_mou_7             3209
std_ic_t2f_mou_8             3051
std_ic_t2f_mou_9             3090
std_ic_t2o_mou_6                1
std_ic_t2o_mou_7                1
std_ic_t2o_mou_8                1
std_ic_t2o_mou_9                1
std_ic_mou_6                11646
std_ic_mou_7                11889
std_ic_mou_8                11662
std_ic_mou_9                11266
total_ic_mou_6              32247
total_ic_mou_7              32242
total_ic_mou_8              32128
total_ic_mou_9              31260
spl_ic_mou_6                   84
spl_ic_mou_7                  107
spl_ic_mou_8                  102
spl_ic_mou_9                  384
isd_ic_mou_6                 5521
isd_ic_mou_7                 5789
isd_ic_mou_8                 5844
isd_ic_mou_9                 5557
ic_others_6                  1817
ic_others_7                  2002
ic_others_8                  1896
ic_others_9                  1923
total_rech_num_6              102
total_rech_num_7              101
total_rech_num_8               96
total_rech_num_9               97
total_rech_amt_6             2305
total_rech_amt_7             2329
total_rech_amt_8             2347
total_rech_amt_9             2304
max_rech_amt_6                202
max_rech_amt_7                183
max_rech_amt_8                213
max_rech_amt_9                201
date_of_last_rech_6            31
date_of_last_rech_7            32
date_of_last_rech_8            32
date_of_last_rech_9            31
last_day_rch_amt_6            186
last_day_rch_amt_7            173
last_day_rch_amt_8            199
last_day_rch_amt_9            185
date_of_last_rech_data_6       31
date_of_last_rech_data_7       32
date_of_last_rech_data_8       32
date_of_last_rech_data_9       31
total_rech_data_6              38
total_rech_data_7              43
total_rech_data_8              47
total_rech_data_9              38
max_rech_data_6                49
max_rech_data_7                49
max_rech_data_8                51
max_rech_data_9                51
av_rech_amt_data_6            888
av_rech_amt_data_7            962
av_rech_amt_data_8            974
av_rech_amt_data_9            946
vol_2g_mb_6                 15201
vol_2g_mb_7                 15114
vol_2g_mb_8                 14994
vol_2g_mb_9                 13919
vol_3g_mb_6                 13773
vol_3g_mb_7                 14519
vol_3g_mb_8                 14960
vol_3g_mb_9                 14472
night_pck_user_6                3
night_pck_user_7                3
night_pck_user_8                3
night_pck_user_9                3
monthly_2g_6                    5
monthly_2g_7                    6
monthly_2g_8                    6
monthly_2g_9                    5
sachet_2g_6                    32
sachet_2g_7                    35
sachet_2g_8                    34
sachet_2g_9                    32
monthly_3g_6                   12
monthly_3g_7                   15
monthly_3g_8                   12
monthly_3g_9                   11
sachet_3g_6                    25
sachet_3g_7                    27
sachet_3g_8                    29
sachet_3g_9                    27
fb_user_6                       3
fb_user_7                       3
fb_user_8                       3
fb_user_9                       3
aon                          3489
aug_vbc_3g                  14676
jul_vbc_3g                  14162
jun_vbc_3g                  13312
sep_vbc_3g                   3720
dtype: int64

In [478]:
#dropping columns with only one unique value as they don'y add value
churn = churn.drop(['loc_og_t2o_mou', 'std_og_t2o_mou', 'loc_ic_t2o_mou'],axis=1)
churn.shape


Out[478]:
(99999, 205)

Identifying High value customers

we can filter high value customers based on the amount of recharge they have done in the good months (6&7)
total recharge amount = data recharge + normal recharge amount
data recharge = total_rech_data*av_rech_amt_data


In [0]:
#data recharge
churn['total_data_rech_6'] = churn['total_rech_data_6'] * churn['av_rech_amt_data_6']
churn['total_data_rech_7'] = churn['total_rech_data_7'] * churn['av_rech_amt_data_7']

In [0]:
#total recharge
#Hemant : does total_rech_amt doesnt already include total_data_rech ?
churn['amt_data_6'] = churn[['total_rech_amt_6','total_data_rech_6']].sum(axis=1)
churn['amt_data_7'] = churn[['total_rech_amt_7','total_data_rech_7']].sum(axis=1)

In [481]:
#average of both months to filter the customers
churn['AVG_amt_data_6_7'] = churn[['amt_data_6','amt_data_7']].mean(axis=1)
churn[['total_rech_amt_6','total_rech_amt_7','AVG_amt_data_6_7']].quantile(np.linspace(.1, 1, 9, 0))


Out[481]:
total_rech_amt_6 total_rech_amt_7 AVG_amt_data_6_7
0.1 0.0 0.0 60.0
0.2 80.0 65.0 108.5
0.3 120.0 110.0 155.0
0.4 170.0 160.0 209.5
0.5 230.0 220.0 275.0
0.6 290.0 283.0 360.0
0.7 378.0 370.0 478.0
0.8 504.0 500.0 656.5
0.9 732.0 731.0 1028.0

In [482]:
churn_highvalue = churn.loc[churn.AVG_amt_data_6_7 >= churn.AVG_amt_data_6_7.quantile(0.7), :]
churn_highvalue = churn_highvalue.reset_index(drop=True)
churn_highvalue.shape


Out[482]:
(30001, 210)

Tagging churn vs non-churn

customers whose outgoing and incoming calls minutes = 0
and useage of data 2g and 3g is 0, those customers are tagged as churned or 1


In [0]:
#tagging churn vs non churn 
# Hemant , changed to AND 
churn_highvalue['churn_tag'] = np.where(
                            ((churn_highvalue['total_ic_mou_9'] == 0.00) & (churn_highvalue['total_og_mou_9'] == 0.00))
                            & ((churn_highvalue['vol_2g_mb_9'] == 0.00) & (churn_highvalue['vol_3g_mb_9'] == 0.00))
                            , 1, 0)

In [484]:
# change data type to category
churn_highvalue.churn_tag = churn_highvalue.churn_tag.astype("category")

# print churn ratio
print("Churn Ratio:")
print(churn_highvalue.churn_tag.value_counts()*100/churn_highvalue.shape[0])


Churn Ratio:
0    91.863605
1     8.136395
Name: churn_tag, dtype: float64

Before proceeding lets drop all the 9th month columns and, the columns of 6,7 that are not required


In [0]:
#removing all the 9th month columns
churn_highvalue = churn_highvalue.drop(churn_highvalue.filter(regex='_9|sep', axis = 1).columns, axis=1)

In [486]:
# delete variables created to filter high-value customers
churn_highvalue = churn_highvalue.drop(['total_data_rech_6', 'total_data_rech_7',
                                      'amt_data_6', 'amt_data_7', 'AVG_amt_data_6_7'], axis=1)
churn_highvalue.shape


Out[486]:
(30001, 155)

In [487]:
round((churn_highvalue.shape[0]/churn.shape[0]),4)*100


Out[487]:
30.0

30% of customers are high value


In [488]:
churn_highvalue.info(verbose="true")


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 30001 entries, 0 to 30000
Data columns (total 155 columns):
 #   Column                    Dtype   
---  ------                    -----   
 0   last_date_of_month_6      object  
 1   last_date_of_month_7      object  
 2   last_date_of_month_8      object  
 3   arpu_6                    float64 
 4   arpu_7                    float64 
 5   arpu_8                    float64 
 6   onnet_mou_6               float64 
 7   onnet_mou_7               float64 
 8   onnet_mou_8               float64 
 9   offnet_mou_6              float64 
 10  offnet_mou_7              float64 
 11  offnet_mou_8              float64 
 12  roam_ic_mou_6             float64 
 13  roam_ic_mou_7             float64 
 14  roam_ic_mou_8             float64 
 15  roam_og_mou_6             float64 
 16  roam_og_mou_7             float64 
 17  roam_og_mou_8             float64 
 18  loc_og_t2t_mou_6          float64 
 19  loc_og_t2t_mou_7          float64 
 20  loc_og_t2t_mou_8          float64 
 21  loc_og_t2m_mou_6          float64 
 22  loc_og_t2m_mou_7          float64 
 23  loc_og_t2m_mou_8          float64 
 24  loc_og_t2f_mou_6          float64 
 25  loc_og_t2f_mou_7          float64 
 26  loc_og_t2f_mou_8          float64 
 27  loc_og_t2c_mou_6          float64 
 28  loc_og_t2c_mou_7          float64 
 29  loc_og_t2c_mou_8          float64 
 30  loc_og_mou_6              float64 
 31  loc_og_mou_7              float64 
 32  loc_og_mou_8              float64 
 33  std_og_t2t_mou_6          float64 
 34  std_og_t2t_mou_7          float64 
 35  std_og_t2t_mou_8          float64 
 36  std_og_t2m_mou_6          float64 
 37  std_og_t2m_mou_7          float64 
 38  std_og_t2m_mou_8          float64 
 39  std_og_t2f_mou_6          float64 
 40  std_og_t2f_mou_7          float64 
 41  std_og_t2f_mou_8          float64 
 42  std_og_t2c_mou_6          float64 
 43  std_og_t2c_mou_7          float64 
 44  std_og_t2c_mou_8          float64 
 45  std_og_mou_6              float64 
 46  std_og_mou_7              float64 
 47  std_og_mou_8              float64 
 48  isd_og_mou_6              float64 
 49  isd_og_mou_7              float64 
 50  isd_og_mou_8              float64 
 51  spl_og_mou_6              float64 
 52  spl_og_mou_7              float64 
 53  spl_og_mou_8              float64 
 54  og_others_6               float64 
 55  og_others_7               float64 
 56  og_others_8               float64 
 57  total_og_mou_6            float64 
 58  total_og_mou_7            float64 
 59  total_og_mou_8            float64 
 60  loc_ic_t2t_mou_6          float64 
 61  loc_ic_t2t_mou_7          float64 
 62  loc_ic_t2t_mou_8          float64 
 63  loc_ic_t2m_mou_6          float64 
 64  loc_ic_t2m_mou_7          float64 
 65  loc_ic_t2m_mou_8          float64 
 66  loc_ic_t2f_mou_6          float64 
 67  loc_ic_t2f_mou_7          float64 
 68  loc_ic_t2f_mou_8          float64 
 69  loc_ic_mou_6              float64 
 70  loc_ic_mou_7              float64 
 71  loc_ic_mou_8              float64 
 72  std_ic_t2t_mou_6          float64 
 73  std_ic_t2t_mou_7          float64 
 74  std_ic_t2t_mou_8          float64 
 75  std_ic_t2m_mou_6          float64 
 76  std_ic_t2m_mou_7          float64 
 77  std_ic_t2m_mou_8          float64 
 78  std_ic_t2f_mou_6          float64 
 79  std_ic_t2f_mou_7          float64 
 80  std_ic_t2f_mou_8          float64 
 81  std_ic_t2o_mou_6          float64 
 82  std_ic_t2o_mou_7          float64 
 83  std_ic_t2o_mou_8          float64 
 84  std_ic_mou_6              float64 
 85  std_ic_mou_7              float64 
 86  std_ic_mou_8              float64 
 87  total_ic_mou_6            float64 
 88  total_ic_mou_7            float64 
 89  total_ic_mou_8            float64 
 90  spl_ic_mou_6              float64 
 91  spl_ic_mou_7              float64 
 92  spl_ic_mou_8              float64 
 93  isd_ic_mou_6              float64 
 94  isd_ic_mou_7              float64 
 95  isd_ic_mou_8              float64 
 96  ic_others_6               float64 
 97  ic_others_7               float64 
 98  ic_others_8               float64 
 99  total_rech_num_6          int64   
 100 total_rech_num_7          int64   
 101 total_rech_num_8          int64   
 102 total_rech_amt_6          int64   
 103 total_rech_amt_7          int64   
 104 total_rech_amt_8          int64   
 105 max_rech_amt_6            int64   
 106 max_rech_amt_7            int64   
 107 max_rech_amt_8            int64   
 108 date_of_last_rech_6       object  
 109 date_of_last_rech_7       object  
 110 date_of_last_rech_8       object  
 111 last_day_rch_amt_6        int64   
 112 last_day_rch_amt_7        int64   
 113 last_day_rch_amt_8        int64   
 114 date_of_last_rech_data_6  object  
 115 date_of_last_rech_data_7  object  
 116 date_of_last_rech_data_8  object  
 117 total_rech_data_6         float64 
 118 total_rech_data_7         float64 
 119 total_rech_data_8         float64 
 120 max_rech_data_6           float64 
 121 max_rech_data_7           float64 
 122 max_rech_data_8           float64 
 123 av_rech_amt_data_6        float64 
 124 av_rech_amt_data_7        float64 
 125 av_rech_amt_data_8        float64 
 126 vol_2g_mb_6               float64 
 127 vol_2g_mb_7               float64 
 128 vol_2g_mb_8               float64 
 129 vol_3g_mb_6               float64 
 130 vol_3g_mb_7               float64 
 131 vol_3g_mb_8               float64 
 132 night_pck_user_6          float64 
 133 night_pck_user_7          float64 
 134 night_pck_user_8          float64 
 135 monthly_2g_6              int64   
 136 monthly_2g_7              int64   
 137 monthly_2g_8              int64   
 138 sachet_2g_6               int64   
 139 sachet_2g_7               int64   
 140 sachet_2g_8               int64   
 141 monthly_3g_6              int64   
 142 monthly_3g_7              int64   
 143 monthly_3g_8              int64   
 144 sachet_3g_6               int64   
 145 sachet_3g_7               int64   
 146 sachet_3g_8               int64   
 147 fb_user_6                 float64 
 148 fb_user_7                 float64 
 149 fb_user_8                 float64 
 150 aon                       int64   
 151 aug_vbc_3g                float64 
 152 jul_vbc_3g                float64 
 153 jun_vbc_3g                float64 
 154 churn_tag                 category
dtypes: category(1), float64(120), int64(25), object(9)
memory usage: 35.3+ MB

In [489]:
churn_v1 = churn_highvalue
churn_v1


Out[489]:
last_date_of_month_6 last_date_of_month_7 last_date_of_month_8 arpu_6 arpu_7 arpu_8 onnet_mou_6 onnet_mou_7 onnet_mou_8 offnet_mou_6 offnet_mou_7 offnet_mou_8 roam_ic_mou_6 roam_ic_mou_7 roam_ic_mou_8 roam_og_mou_6 roam_og_mou_7 roam_og_mou_8 loc_og_t2t_mou_6 loc_og_t2t_mou_7 loc_og_t2t_mou_8 loc_og_t2m_mou_6 loc_og_t2m_mou_7 loc_og_t2m_mou_8 loc_og_t2f_mou_6 loc_og_t2f_mou_7 loc_og_t2f_mou_8 loc_og_t2c_mou_6 loc_og_t2c_mou_7 loc_og_t2c_mou_8 loc_og_mou_6 loc_og_mou_7 loc_og_mou_8 std_og_t2t_mou_6 std_og_t2t_mou_7 std_og_t2t_mou_8 std_og_t2m_mou_6 std_og_t2m_mou_7 std_og_t2m_mou_8 std_og_t2f_mou_6 std_og_t2f_mou_7 std_og_t2f_mou_8 std_og_t2c_mou_6 std_og_t2c_mou_7 std_og_t2c_mou_8 std_og_mou_6 std_og_mou_7 std_og_mou_8 isd_og_mou_6 isd_og_mou_7 isd_og_mou_8 spl_og_mou_6 spl_og_mou_7 spl_og_mou_8 og_others_6 og_others_7 og_others_8 total_og_mou_6 total_og_mou_7 total_og_mou_8 loc_ic_t2t_mou_6 loc_ic_t2t_mou_7 loc_ic_t2t_mou_8 loc_ic_t2m_mou_6 loc_ic_t2m_mou_7 loc_ic_t2m_mou_8 loc_ic_t2f_mou_6 loc_ic_t2f_mou_7 loc_ic_t2f_mou_8 loc_ic_mou_6 loc_ic_mou_7 loc_ic_mou_8 std_ic_t2t_mou_6 std_ic_t2t_mou_7 std_ic_t2t_mou_8 std_ic_t2m_mou_6 std_ic_t2m_mou_7 std_ic_t2m_mou_8 std_ic_t2f_mou_6 std_ic_t2f_mou_7 std_ic_t2f_mou_8 std_ic_t2o_mou_6 std_ic_t2o_mou_7 std_ic_t2o_mou_8 std_ic_mou_6 std_ic_mou_7 std_ic_mou_8 total_ic_mou_6 total_ic_mou_7 total_ic_mou_8 spl_ic_mou_6 spl_ic_mou_7 spl_ic_mou_8 isd_ic_mou_6 isd_ic_mou_7 isd_ic_mou_8 ic_others_6 ic_others_7 ic_others_8 total_rech_num_6 total_rech_num_7 total_rech_num_8 total_rech_amt_6 total_rech_amt_7 total_rech_amt_8 max_rech_amt_6 max_rech_amt_7 max_rech_amt_8 date_of_last_rech_6 date_of_last_rech_7 date_of_last_rech_8 last_day_rch_amt_6 last_day_rch_amt_7 last_day_rch_amt_8 date_of_last_rech_data_6 date_of_last_rech_data_7 date_of_last_rech_data_8 total_rech_data_6 total_rech_data_7 total_rech_data_8 max_rech_data_6 max_rech_data_7 max_rech_data_8 av_rech_amt_data_6 av_rech_amt_data_7 av_rech_amt_data_8 vol_2g_mb_6 vol_2g_mb_7 vol_2g_mb_8 vol_3g_mb_6 vol_3g_mb_7 vol_3g_mb_8 night_pck_user_6 night_pck_user_7 night_pck_user_8 monthly_2g_6 monthly_2g_7 monthly_2g_8 sachet_2g_6 sachet_2g_7 sachet_2g_8 monthly_3g_6 monthly_3g_7 monthly_3g_8 sachet_3g_6 sachet_3g_7 sachet_3g_8 fb_user_6 fb_user_7 fb_user_8 aon aug_vbc_3g jul_vbc_3g jun_vbc_3g churn_tag
0 6/30/2014 7/31/2014 8/31/2014 197.385 214.816 213.803 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.0 0.0 0.0 0.00 0.00 0.00 0.0 0.00 0.00 0.00 0.00 0.00 0.00 0.0 0.0 0.00 0.00 0.00 0.00 0.00 0.16 0.00 0.00 4.13 0.00 0.00 1.15 0.00 0.00 5.44 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.0 0.0 0.0 0.00 0.00 0.00 0.00 0.00 5.44 0.00 0.0 0.0 0.00 0.00 0.00 0.00 0.00 0.00 4 3 2 362 252 252 252 252 252 6/21/2014 7/16/2014 8/8/2014 252 252 252 6/21/2014 7/16/2014 8/8/2014 1.0 1.0 1.0 252.0 252.0 252.0 252.0 252.0 252.0 30.13 1.32 5.75 83.57 150.76 109.61 0.0 0.0 0.0 0 0 0 0 0 0 1 1 1 0 0 0 1.0 1.0 1.0 968 30.40 0.00 101.20 1
1 6/30/2014 7/31/2014 8/31/2014 1069.180 1349.850 3171.480 57.84 54.68 52.29 453.43 567.16 325.91 16.23 33.49 31.64 23.74 12.59 38.06 51.39 31.38 40.28 308.63 447.38 162.28 62.13 55.14 53.23 0.00 0.00 0.00 422.16 533.91 255.79 4.30 23.29 12.01 49.89 31.76 49.14 6.66 20.08 16.68 0.0 0.0 0.0 60.86 75.14 77.84 0.0 0.18 10.01 4.50 0.00 6.50 0.00 0.0 0.0 487.53 609.24 350.16 58.14 32.26 27.31 217.56 221.49 121.19 152.16 101.46 39.53 427.88 355.23 188.04 36.89 11.83 30.39 91.44 126.99 141.33 52.19 34.24 22.21 0.0 0.0 0.0 180.54 173.08 193.94 626.46 558.04 428.74 0.21 0.0 0.0 2.06 14.53 31.59 15.74 15.19 15.14 5 5 7 1580 790 3638 1580 790 1580 6/27/2014 7/25/2014 8/26/2014 0 0 779 0 0 0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.00 0.00 0.00 0.00 0.00 0.00 -1.0 -1.0 -1.0 0 0 0 0 0 0 0 0 0 0 0 0 -1.0 -1.0 -1.0 802 57.74 19.38 18.74 1
2 6/30/2014 7/31/2014 8/31/2014 378.721 492.223 137.362 413.69 351.03 35.08 94.66 80.63 136.48 0.00 0.00 0.00 0.00 0.00 0.00 297.13 217.59 12.49 80.96 70.58 50.54 0.00 0.00 0.00 0.00 0.00 7.15 378.09 288.18 63.04 116.56 133.43 22.58 13.69 10.04 75.69 0.00 0.00 0.00 0.0 0.0 0.0 130.26 143.48 98.28 0.0 0.00 0.00 0.00 0.00 10.23 0.00 0.0 0.0 508.36 431.66 171.56 23.84 9.84 0.31 57.58 13.98 15.48 0.00 0.00 0.00 81.43 23.83 15.79 0.00 0.58 0.10 22.43 4.08 0.65 0.00 0.00 0.00 0.0 0.0 0.0 22.43 4.66 0.75 103.86 28.49 16.54 0.00 0.0 0.0 0.00 0.00 0.00 0.00 0.00 0.00 19 21 14 437 601 120 90 154 30 6/25/2014 7/31/2014 8/30/2014 50 0 10 0 7/31/2014 8/23/2014 0.0 2.0 3.0 0.0 154.0 23.0 0.0 177.0 69.0 0.00 356.00 0.03 0.00 750.95 11.94 -1.0 0.0 0.0 0 1 0 0 1 3 0 0 0 0 0 0 -1.0 1.0 1.0 315 21.03 910.65 122.16 0
3 6/30/2014 7/31/2014 8/31/2014 514.453 597.753 637.760 102.41 132.11 85.14 757.93 896.68 983.39 0.00 0.00 0.00 0.00 0.00 0.00 4.48 6.16 23.34 91.81 87.93 104.81 0.75 0.00 1.58 0.00 0.00 0.00 97.04 94.09 129.74 97.93 125.94 61.79 665.36 808.74 876.99 0.00 0.00 0.00 0.0 0.0 0.0 763.29 934.69 938.79 0.0 0.00 0.00 0.00 0.00 0.00 0.00 0.0 0.0 860.34 1028.79 1068.54 2.48 10.19 19.54 118.23 74.63 129.16 4.61 2.84 10.39 125.33 87.68 159.11 14.06 5.98 0.18 67.69 38.23 101.74 0.00 0.00 0.00 0.0 0.0 0.0 81.76 44.21 101.93 207.09 131.89 261.04 0.00 0.0 0.0 0.00 0.00 0.00 0.00 0.00 0.00 22 26 27 600 680 718 50 50 50 6/30/2014 7/31/2014 8/31/2014 30 20 50 0 0 0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.00 0.00 0.00 0.00 0.00 0.00 -1.0 -1.0 -1.0 0 0 0 0 0 0 0 0 0 0 0 0 -1.0 -1.0 -1.0 720 0.00 0.00 0.00 0
4 6/30/2014 7/31/2014 8/31/2014 74.350 193.897 366.966 48.96 50.66 33.58 85.41 89.36 205.89 0.00 0.00 0.00 0.00 0.00 0.00 48.96 50.66 33.58 82.94 83.01 148.56 0.00 0.00 0.00 0.00 0.00 17.71 131.91 133.68 182.14 0.00 0.00 0.00 2.36 6.35 39.61 0.00 0.00 0.00 0.0 0.0 0.0 2.36 6.35 39.61 0.0 0.01 0.00 0.10 0.00 17.71 0.00 0.0 0.0 134.38 140.04 239.48 20.71 61.04 76.64 95.91 113.36 146.84 0.00 0.00 0.71 116.63 174.41 224.21 0.51 0.00 13.38 2.43 14.89 43.91 0.00 0.00 0.00 0.0 0.0 0.0 2.94 14.89 57.29 119.58 222.89 298.33 0.00 0.0 0.0 0.00 28.23 3.74 0.00 5.35 13.06 3 5 4 0 454 439 0 179 179 6/18/2014 7/7/2014 8/24/2014 0 179 0 0 7/7/2014 8/6/2014 0.0 2.0 2.0 0.0 179.0 179.0 0.0 356.0 270.0 0.00 0.48 0.01 0.00 599.09 1009.92 -1.0 0.0 0.0 0 0 0 0 0 0 0 2 1 0 0 1 -1.0 1.0 1.0 604 40.45 51.86 0.00 0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
29996 6/30/2014 7/31/2014 8/31/2014 384.316 255.405 393.474 78.68 29.04 103.24 56.13 28.09 61.44 0.00 0.00 0.00 0.00 0.00 0.00 72.53 29.04 89.23 52.21 20.89 55.59 0.00 0.00 5.76 3.91 0.00 0.00 124.74 49.94 150.59 6.15 0.00 14.01 0.00 0.00 0.00 0.00 0.00 0.00 0.0 0.0 0.0 6.15 0.00 14.01 0.0 0.00 0.00 3.91 7.20 0.08 0.00 0.0 0.0 134.81 57.14 164.69 285.33 264.44 303.61 93.36 61.56 58.54 0.30 11.26 40.41 378.99 337.28 402.58 57.60 0.91 17.36 0.00 0.00 0.00 41.59 6.51 0.00 0.0 0.0 0.0 99.19 7.43 17.36 478.48 344.78 420.46 0.13 0.0 0.0 0.00 0.06 0.00 0.15 0.00 0.51 2 4 3 252 372 512 252 252 252 6/17/2014 7/25/2014 8/27/2014 252 0 130 6/17/2014 7/14/2014 8/23/2014 1.0 1.0 1.0 252.0 252.0 252.0 252.0 252.0 252.0 54.81 101.02 112.07 692.72 596.91 1012.70 0.0 0.0 0.0 0 0 0 0 0 0 1 1 1 0 0 0 1.0 1.0 1.0 473 1100.43 619.59 668.05 0
29997 6/30/2014 7/31/2014 8/31/2014 328.594 202.966 118.707 423.99 181.83 5.71 39.51 39.81 18.26 0.00 0.00 0.00 0.00 0.00 0.00 423.99 181.83 5.71 17.96 20.46 10.98 0.00 0.00 0.00 17.04 15.38 7.28 441.96 202.29 16.69 0.00 0.00 0.00 0.03 3.96 0.00 0.00 0.00 0.00 0.0 0.0 0.0 0.03 3.96 0.00 0.0 0.00 0.00 21.51 15.38 7.28 1.06 0.0 0.0 464.58 221.64 23.98 32.21 45.14 20.94 53.49 76.76 81.86 0.00 0.00 0.00 85.71 121.91 102.81 0.00 0.00 0.00 2.33 0.75 0.00 0.99 1.04 0.00 0.0 0.0 0.0 3.33 1.79 0.00 89.48 123.71 102.81 0.43 0.0 0.0 0.00 0.00 0.00 0.00 0.00 0.00 8 10 9 360 239 137 154 46 30 6/21/2014 7/31/2014 8/27/2014 0 25 25 6/15/2014 7/31/2014 8/27/2014 1.0 4.0 5.0 154.0 46.0 25.0 154.0 121.0 117.0 1248.50 725.05 202.22 0.00 0.00 0.00 0.0 0.0 0.0 1 0 0 0 4 5 0 0 0 0 0 0 1.0 1.0 1.0 820 0.00 0.00 0.00 0
29998 6/30/2014 7/31/2014 8/31/2014 644.973 455.228 564.334 806.73 549.36 775.41 784.76 617.13 595.44 0.00 0.00 0.00 0.00 0.00 0.00 709.21 496.14 718.56 574.93 546.84 493.48 16.28 19.48 13.01 15.96 16.71 9.15 1300.43 1062.48 1225.06 97.51 53.21 56.84 186.88 50.79 88.94 0.00 0.00 0.00 0.0 0.0 0.0 284.39 104.01 145.79 0.0 0.00 0.00 15.96 16.71 9.15 0.00 0.0 0.0 1600.79 1183.21 1380.01 140.71 104.04 148.21 395.58 475.33 450.01 29.96 38.69 37.61 566.26 618.08 635.84 2.31 0.00 0.00 12.14 1.03 23.71 0.00 2.73 0.45 0.0 0.0 0.0 14.46 3.76 24.16 580.73 622.28 660.01 0.00 0.0 0.0 0.00 0.43 0.00 0.00 0.00 0.00 6 8 3 567 1130 25 550 786 25 6/20/2014 7/28/2014 8/18/2014 550 786 0 6/17/2014 7/18/2014 8/14/2014 1.0 1.0 1.0 17.0 14.0 25.0 17.0 14.0 25.0 34.28 16.41 6.47 736.01 1129.34 926.78 0.0 0.0 0.0 0 0 0 1 1 1 0 0 0 0 0 0 1.0 1.0 1.0 2696 497.45 598.67 604.08 0
29999 6/30/2014 7/31/2014 8/31/2014 312.558 512.932 402.080 199.89 174.46 2.46 175.88 277.01 248.33 0.00 0.00 0.00 0.00 0.00 0.00 170.28 146.48 2.46 137.83 148.78 128.01 0.00 0.00 0.00 0.00 0.00 0.01 308.11 295.26 130.48 29.61 27.98 0.00 38.04 128.23 120.29 0.00 0.00 0.00 0.0 0.0 0.0 67.66 156.21 120.29 0.0 0.00 0.00 0.00 0.00 0.01 0.00 0.0 0.0 375.78 451.48 250.79 47.56 3.90 1.50 245.31 256.46 1122.83 14.43 28.39 20.31 307.31 288.76 1144.64 10.08 13.21 2.03 680.34 72.99 86.11 1.01 0.00 0.71 0.0 0.0 0.0 691.44 86.21 88.86 998.96 374.98 1233.83 0.00 0.0 0.0 0.00 0.00 0.00 0.20 0.00 0.31 12 12 9 380 554 504 50 154 154 6/30/2014 7/28/2014 8/29/2014 50 50 50 0 7/26/2014 8/23/2014 0.0 1.0 1.0 0.0 154.0 154.0 0.0 154.0 154.0 0.00 0.00 0.79 0.00 13.56 678.74 -1.0 0.0 0.0 0 1 1 0 0 0 0 0 0 0 0 0 -1.0 1.0 1.0 328 104.73 0.00 0.00 0
30000 6/30/2014 7/31/2014 8/31/2014 322.991 303.386 606.817 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.0 0.0 0.0 0.00 0.00 0.00 0.0 0.00 0.00 0.00 0.00 0.00 0.00 0.0 0.0 0.00 0.00 0.00 0.00 0.70 0.00 0.81 10.39 1.71 0.00 0.00 0.00 0.81 11.09 1.71 0.00 0.00 0.00 0.00 0.00 0.58 0.00 0.00 0.00 0.0 0.0 0.0 0.00 0.00 0.58 0.81 11.09 2.29 0.00 0.0 0.0 0.00 0.00 0.00 0.00 0.00 0.00 3 4 4 381 358 716 202 179 179 6/17/2014 7/19/2014 8/20/2014 202 179 179 6/17/2014 7/19/2014 8/20/2014 3.0 2.0 4.0 202.0 179.0 179.0 583.0 358.0 716.0 76.51 241.77 136.47 1453.63 1382.08 2683.30 0.0 0.0 0.0 0 0 0 1 0 0 1 2 4 1 0 0 1.0 1.0 1.0 478 1445.74 1151.03 1173.18 0

30001 rows × 155 columns

EDA


In [0]:
def plotCategoricalVariables(columnName,dataFrame):
    sns.set(style="whitegrid")
    ax = (dataFrame.groupby(columnName).size()/len(dataFrame.index)).plot.bar(color=sns.color_palette('dark', 15))
    ax.set(xlabel= columnName, ylabel='Count')

    plt.show()

In [491]:
plt.figure(figsize=(7,4))
plotCategoricalVariables("churn_tag", churn_v1)



In [492]:
churn_v1.info(verbose=True)


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 30001 entries, 0 to 30000
Data columns (total 155 columns):
 #   Column                    Dtype   
---  ------                    -----   
 0   last_date_of_month_6      object  
 1   last_date_of_month_7      object  
 2   last_date_of_month_8      object  
 3   arpu_6                    float64 
 4   arpu_7                    float64 
 5   arpu_8                    float64 
 6   onnet_mou_6               float64 
 7   onnet_mou_7               float64 
 8   onnet_mou_8               float64 
 9   offnet_mou_6              float64 
 10  offnet_mou_7              float64 
 11  offnet_mou_8              float64 
 12  roam_ic_mou_6             float64 
 13  roam_ic_mou_7             float64 
 14  roam_ic_mou_8             float64 
 15  roam_og_mou_6             float64 
 16  roam_og_mou_7             float64 
 17  roam_og_mou_8             float64 
 18  loc_og_t2t_mou_6          float64 
 19  loc_og_t2t_mou_7          float64 
 20  loc_og_t2t_mou_8          float64 
 21  loc_og_t2m_mou_6          float64 
 22  loc_og_t2m_mou_7          float64 
 23  loc_og_t2m_mou_8          float64 
 24  loc_og_t2f_mou_6          float64 
 25  loc_og_t2f_mou_7          float64 
 26  loc_og_t2f_mou_8          float64 
 27  loc_og_t2c_mou_6          float64 
 28  loc_og_t2c_mou_7          float64 
 29  loc_og_t2c_mou_8          float64 
 30  loc_og_mou_6              float64 
 31  loc_og_mou_7              float64 
 32  loc_og_mou_8              float64 
 33  std_og_t2t_mou_6          float64 
 34  std_og_t2t_mou_7          float64 
 35  std_og_t2t_mou_8          float64 
 36  std_og_t2m_mou_6          float64 
 37  std_og_t2m_mou_7          float64 
 38  std_og_t2m_mou_8          float64 
 39  std_og_t2f_mou_6          float64 
 40  std_og_t2f_mou_7          float64 
 41  std_og_t2f_mou_8          float64 
 42  std_og_t2c_mou_6          float64 
 43  std_og_t2c_mou_7          float64 
 44  std_og_t2c_mou_8          float64 
 45  std_og_mou_6              float64 
 46  std_og_mou_7              float64 
 47  std_og_mou_8              float64 
 48  isd_og_mou_6              float64 
 49  isd_og_mou_7              float64 
 50  isd_og_mou_8              float64 
 51  spl_og_mou_6              float64 
 52  spl_og_mou_7              float64 
 53  spl_og_mou_8              float64 
 54  og_others_6               float64 
 55  og_others_7               float64 
 56  og_others_8               float64 
 57  total_og_mou_6            float64 
 58  total_og_mou_7            float64 
 59  total_og_mou_8            float64 
 60  loc_ic_t2t_mou_6          float64 
 61  loc_ic_t2t_mou_7          float64 
 62  loc_ic_t2t_mou_8          float64 
 63  loc_ic_t2m_mou_6          float64 
 64  loc_ic_t2m_mou_7          float64 
 65  loc_ic_t2m_mou_8          float64 
 66  loc_ic_t2f_mou_6          float64 
 67  loc_ic_t2f_mou_7          float64 
 68  loc_ic_t2f_mou_8          float64 
 69  loc_ic_mou_6              float64 
 70  loc_ic_mou_7              float64 
 71  loc_ic_mou_8              float64 
 72  std_ic_t2t_mou_6          float64 
 73  std_ic_t2t_mou_7          float64 
 74  std_ic_t2t_mou_8          float64 
 75  std_ic_t2m_mou_6          float64 
 76  std_ic_t2m_mou_7          float64 
 77  std_ic_t2m_mou_8          float64 
 78  std_ic_t2f_mou_6          float64 
 79  std_ic_t2f_mou_7          float64 
 80  std_ic_t2f_mou_8          float64 
 81  std_ic_t2o_mou_6          float64 
 82  std_ic_t2o_mou_7          float64 
 83  std_ic_t2o_mou_8          float64 
 84  std_ic_mou_6              float64 
 85  std_ic_mou_7              float64 
 86  std_ic_mou_8              float64 
 87  total_ic_mou_6            float64 
 88  total_ic_mou_7            float64 
 89  total_ic_mou_8            float64 
 90  spl_ic_mou_6              float64 
 91  spl_ic_mou_7              float64 
 92  spl_ic_mou_8              float64 
 93  isd_ic_mou_6              float64 
 94  isd_ic_mou_7              float64 
 95  isd_ic_mou_8              float64 
 96  ic_others_6               float64 
 97  ic_others_7               float64 
 98  ic_others_8               float64 
 99  total_rech_num_6          int64   
 100 total_rech_num_7          int64   
 101 total_rech_num_8          int64   
 102 total_rech_amt_6          int64   
 103 total_rech_amt_7          int64   
 104 total_rech_amt_8          int64   
 105 max_rech_amt_6            int64   
 106 max_rech_amt_7            int64   
 107 max_rech_amt_8            int64   
 108 date_of_last_rech_6       object  
 109 date_of_last_rech_7       object  
 110 date_of_last_rech_8       object  
 111 last_day_rch_amt_6        int64   
 112 last_day_rch_amt_7        int64   
 113 last_day_rch_amt_8        int64   
 114 date_of_last_rech_data_6  object  
 115 date_of_last_rech_data_7  object  
 116 date_of_last_rech_data_8  object  
 117 total_rech_data_6         float64 
 118 total_rech_data_7         float64 
 119 total_rech_data_8         float64 
 120 max_rech_data_6           float64 
 121 max_rech_data_7           float64 
 122 max_rech_data_8           float64 
 123 av_rech_amt_data_6        float64 
 124 av_rech_amt_data_7        float64 
 125 av_rech_amt_data_8        float64 
 126 vol_2g_mb_6               float64 
 127 vol_2g_mb_7               float64 
 128 vol_2g_mb_8               float64 
 129 vol_3g_mb_6               float64 
 130 vol_3g_mb_7               float64 
 131 vol_3g_mb_8               float64 
 132 night_pck_user_6          float64 
 133 night_pck_user_7          float64 
 134 night_pck_user_8          float64 
 135 monthly_2g_6              int64   
 136 monthly_2g_7              int64   
 137 monthly_2g_8              int64   
 138 sachet_2g_6               int64   
 139 sachet_2g_7               int64   
 140 sachet_2g_8               int64   
 141 monthly_3g_6              int64   
 142 monthly_3g_7              int64   
 143 monthly_3g_8              int64   
 144 sachet_3g_6               int64   
 145 sachet_3g_7               int64   
 146 sachet_3g_8               int64   
 147 fb_user_6                 float64 
 148 fb_user_7                 float64 
 149 fb_user_8                 float64 
 150 aon                       int64   
 151 aug_vbc_3g                float64 
 152 jul_vbc_3g                float64 
 153 jun_vbc_3g                float64 
 154 churn_tag                 category
dtypes: category(1), float64(120), int64(25), object(9)
memory usage: 35.3+ MB

The dataset is imbalanced , only 8% churn customers. We need class balancing.


In [0]:
#Neeed balancing by synthetic generation of new churn records

Y = churn_v1.pop("churn_tag")
X = churn_v1.select_dtypes(exclude=['object'])

In [494]:
Y.head()


Out[494]:
0    1
1    1
2    0
3    0
4    0
Name: churn_tag, dtype: category
Categories (2, int64): [0, 1]

In [0]:
# Test Train Split should happen before balancing should be applied only on train data
from sklearn.model_selection import train_test_split

X_train, X_test, Y_train, Y_test = train_test_split(X,Y, train_size=0.8,test_size=0.2,random_state=1)

Balancing the DataSet


In [496]:
#Before Class Balancing
Y_DF = pd.DataFrame(Y_train)

plotCategoricalVariables("churn_tag", Y_DF)



In [497]:
# ADASYN Balancing , ADASYN is more advanced than SMOTE
from imblearn.over_sampling import ADASYN 

adaSyn = ADASYN()
X_Train_Bal, Y_Train_Bal = adaSyn.fit_sample(X_train, Y_train)
print(X_train.shape)
print(X_Train_Bal.shape)


/usr/local/lib/python3.6/dist-packages/sklearn/utils/deprecation.py:87: FutureWarning: Function safe_indexing is deprecated; safe_indexing is deprecated in version 0.22 and will be removed in version 0.24.
  warnings.warn(msg, category=FutureWarning)
(24000, 145)
(43722, 145)

In [498]:
Y_Train_Bal_DF = pd.DataFrame(Y_Train_Bal, columns = {"churn_tag"})
plotCategoricalVariables("churn_tag", Y_Train_Bal_DF)


AdaSyn Balanced Training Set

Standardization Of Data


In [0]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_Train_Bal_Standardized = scaler.fit_transform(X_Train_Bal)
X_Test_Standardized = scaler.transform(X_test)

Applying PCA


In [500]:
from sklearn.decomposition import PCA
pca = PCA(random_state=100)
pca.fit(X_Train_Bal_Standardized)


Out[500]:
PCA(copy=True, iterated_power='auto', n_components=None, random_state=100,
    svd_solver='auto', tol=0.0, whiten=False)

In [501]:
pca.components_


Out[501]:
array([[ 1.23124594e-01,  1.32918417e-01,  1.09281082e-01, ...,
        -7.06306859e-02, -8.16028378e-02, -7.90099206e-02],
       [ 6.49853219e-02,  9.65534940e-02,  1.42321643e-01, ...,
         1.23030389e-01,  1.13296255e-01,  9.77905591e-02],
       [ 9.29517373e-02,  1.47201067e-01,  1.57385030e-01, ...,
         1.10895432e-01,  1.10334179e-01,  9.24613249e-02],
       ...,
       [-0.00000000e+00,  1.04511239e-16,  1.04066389e-16, ...,
         2.51955741e-17,  4.26830765e-17, -4.21585639e-17],
       [-0.00000000e+00,  1.99423369e-17, -4.80545229e-17, ...,
        -1.11348533e-16, -1.21596755e-16, -1.41379300e-17],
       [-0.00000000e+00, -8.24241638e-17, -4.83002959e-17, ...,
         2.82336961e-17,  2.20256174e-17, -1.70258139e-16]])

In [502]:
pca.explained_variance_ratio_


Out[502]:
array([1.11096162e-01, 9.40039022e-02, 5.88367391e-02, 4.80784319e-02,
       3.87452684e-02, 3.29300221e-02, 2.98569103e-02, 2.80631279e-02,
       2.68648857e-02, 2.35067893e-02, 2.23296896e-02, 2.10839844e-02,
       1.98166379e-02, 1.86055813e-02, 1.78535514e-02, 1.70829830e-02,
       1.59485888e-02, 1.50291335e-02, 1.47079620e-02, 1.37674102e-02,
       1.35791504e-02, 1.28076827e-02, 1.26426809e-02, 1.04149132e-02,
       1.00899948e-02, 9.41386459e-03, 9.19761727e-03, 9.06832370e-03,
       8.96448940e-03, 8.52577628e-03, 8.13255866e-03, 7.50156760e-03,
       7.05254684e-03, 6.90174930e-03, 6.86674529e-03, 6.75575092e-03,
       6.49036909e-03, 6.48010113e-03, 6.14915986e-03, 6.10364541e-03,
       5.99738218e-03, 5.69626265e-03, 5.43753448e-03, 5.09305338e-03,
       4.87404622e-03, 4.80485145e-03, 4.36732961e-03, 4.24018836e-03,
       4.21708484e-03, 4.06962568e-03, 4.02498484e-03, 3.92082227e-03,
       3.85384595e-03, 3.74068801e-03, 3.64367497e-03, 3.53286264e-03,
       3.32835203e-03, 3.30233358e-03, 3.18500737e-03, 3.00272055e-03,
       2.93912289e-03, 2.89800809e-03, 2.76171892e-03, 2.69014252e-03,
       2.56382086e-03, 2.52873772e-03, 2.45887183e-03, 2.40748616e-03,
       2.39756450e-03, 2.35177049e-03, 2.28732412e-03, 2.25033015e-03,
       2.15664524e-03, 2.02567192e-03, 1.99955288e-03, 1.96574109e-03,
       1.90379946e-03, 1.87639250e-03, 1.78929013e-03, 1.73071952e-03,
       1.70095982e-03, 1.65556842e-03, 1.56082646e-03, 1.51594784e-03,
       1.45518837e-03, 1.44026567e-03, 1.43273624e-03, 1.40090210e-03,
       1.27679887e-03, 1.24597921e-03, 1.21420029e-03, 1.20326062e-03,
       9.22089073e-04, 9.07572968e-04, 8.78269841e-04, 8.50618984e-04,
       8.37832610e-04, 8.05568120e-04, 7.43992043e-04, 7.03474172e-04,
       6.86629004e-04, 6.67611184e-04, 6.01632307e-04, 4.74260300e-04,
       4.11107998e-04, 3.83107378e-04, 3.03208302e-04, 2.52920712e-04,
       2.23651974e-04, 1.66524114e-04, 1.59471719e-04, 1.26793562e-04,
       8.74889059e-05, 2.63965013e-05, 1.22953111e-05, 1.87290055e-06,
       6.87351616e-07, 4.78085835e-07, 9.38129408e-12, 4.28420430e-12,
       3.75471558e-12, 3.54353374e-12, 2.12613774e-12, 1.64922624e-12,
       1.33828093e-12, 1.08202370e-12, 9.57970097e-13, 8.06462007e-13,
       7.82566299e-13, 7.49389108e-13, 4.39265565e-13, 2.92777323e-13,
       2.27646777e-13, 1.48111297e-13, 1.10716690e-13, 9.57235434e-14,
       2.82184747e-31, 2.19098548e-32, 1.71077606e-33, 4.19989612e-34,
       4.19989612e-34, 4.19989612e-34, 4.19989612e-34, 4.19989612e-34,
       1.03105999e-34])

In [503]:
plt.bar(range(1,len(pca.explained_variance_ratio_)+1), pca.explained_variance_ratio_)


Out[503]:
<BarContainer object of 145 artists>

In [0]:
var_cumu = np.cumsum(pca.explained_variance_ratio_)

In [505]:
fig = plt.figure(figsize = (15,10))
plt.plot(range(1,len(var_cumu)+1), var_cumu)


Out[505]:
[<matplotlib.lines.Line2D at 0x7f6692498ef0>]

40 Features explain ~ 85 % variance and 60 features ~ 90 % , with 100 features variance explained is almost 100 %


In [0]:
# for  better model , we will take 40 features

In [508]:
pca40Components = PCA(n_components = 40, random_state=100)
X_Train_Bal_Standardized_PCA40 = pca40Components.fit_transform(X_Train_Bal_Standardized)
X_Test_Standardized_PCA40 = pca40Components.transform(X_Test_Standardized)
print(X_Train_Bal_Standardized_PCA40.shape)
print(X_Test_Standardized_PCA40.shape)


(43722, 40)
(6001, 40)

Now the Data is Balanced, Standardized and of Reduced Dimensionality

We need to apply

3 Models for good Prediction

1 Model for Interpretability

  1. Decision Tree

In [0]:
from sklearn.model_selection import KFold
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_val_score
from sklearn.tree import DecisionTreeClassifier
from sklearn import metrics

In [0]:
searchParamGrid = {
    'max_depth': range(5,15,3),
    'min_samples_leaf': range(50, 500, 50),
    'min_samples_split': range(100, 500, 100),
}

In [0]:
dtc=DecisionTreeClassifier(random_state=100)
grid_search = GridSearchCV(estimator = dtc, param_grid = searchParamGrid, 
                          cv = 5, verbose=2, n_jobs = 4)

In [530]:
grid_search.fit(X_Train_Bal_Standardized_PCA40,Y_Train_Bal )


Fitting 5 folds for each of 144 candidates, totalling 720 fits
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-530-d03a25f39490> in <module>()
----> 1 grid_search.fit(X_Train_Bal_Standardized_PCA40,Y_Train_Bal )

/usr/local/lib/python3.6/dist-packages/sklearn/model_selection/_search.py in fit(self, X, y, groups, **fit_params)
    708                 return results
    709 
--> 710             self._run_search(evaluate_candidates)
    711 
    712         # For multi-metric evaluation, store the best_index_, best_params_ and

/usr/local/lib/python3.6/dist-packages/sklearn/model_selection/_search.py in _run_search(self, evaluate_candidates)
   1149     def _run_search(self, evaluate_candidates):
   1150         """Search all candidates in param_grid"""
-> 1151         evaluate_candidates(ParameterGrid(self.param_grid))
   1152 
   1153 

/usr/local/lib/python3.6/dist-packages/sklearn/model_selection/_search.py in evaluate_candidates(candidate_params)
    687                                for parameters, (train, test)
    688                                in product(candidate_params,
--> 689                                           cv.split(X, y, groups)))
    690 
    691                 if len(out) < 1:

/usr/local/lib/python3.6/dist-packages/joblib/parallel.py in __call__(self, iterable)
   1015 
   1016             with self._backend.retrieval_context():
-> 1017                 self.retrieve()
   1018             # Make sure that we get a last message telling us we are done
   1019             elapsed_time = time.time() - self._start_time

/usr/local/lib/python3.6/dist-packages/joblib/parallel.py in retrieve(self)
    907             try:
    908                 if getattr(self._backend, 'supports_timeout', False):
--> 909                     self._output.extend(job.get(timeout=self.timeout))
    910                 else:
    911                     self._output.extend(job.get())

/usr/local/lib/python3.6/dist-packages/joblib/_parallel_backends.py in wrap_future_result(future, timeout)
    560         AsyncResults.get from multiprocessing."""
    561         try:
--> 562             return future.result(timeout=timeout)
    563         except LokyTimeoutError:
    564             raise TimeoutError()

/usr/lib/python3.6/concurrent/futures/_base.py in result(self, timeout)
    425                 return self.__get_result()
    426 
--> 427             self._condition.wait(timeout)
    428 
    429             if self._state in [CANCELLED, CANCELLED_AND_NOTIFIED]:

/usr/lib/python3.6/threading.py in wait(self, timeout)
    293         try:    # restore state no matter what (e.g., KeyboardInterrupt)
    294             if timeout is None:
--> 295                 waiter.acquire()
    296                 gotit = True
    297             else:

KeyboardInterrupt: 

In [0]:
print(grid_search.best_score_)
print(grid_search.best_params_)

In [0]:
dtc_best = DecisionTreeClassifier(max_depth=11,
                             min_samples_leaf=50, 
                             min_samples_split=100,
                             random_state=100)

In [532]:
dtc_best.fit(X_Train_Bal_Standardized_PCA40,Y_Train_Bal )


Out[532]:
DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None, criterion='gini',
                       max_depth=11, max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=50, min_samples_split=100,
                       min_weight_fraction_leaf=0.0, presort='deprecated',
                       random_state=100, splitter='best')

In [0]:
Y_TestPreds = dtc_best.predict(X_Test_Standardized_PCA40)

In [540]:
from sklearn.metrics import classification_report
print(classification_report(Y_test,Y_TestPreds))


              precision    recall  f1-score   support

           0       0.97      0.80      0.88      5499
           1       0.25      0.73      0.38       502

    accuracy                           0.80      6001
   macro avg       0.61      0.77      0.63      6001
weighted avg       0.91      0.80      0.84      6001

Recall is important as we want to identify all churn cases. Using Grid Search with Decision Tree we get Test Recall of 0.73

  1. Gradient Boost

In [0]:
GBC = GradientBoostingClassifier(random_state=100)

In [0]:
from sklearn.ensemble import GradientBoostingClassifier
import xgboost as xgb
from xgboost import XGBClassifier
from xgboost import plot_importance

In [560]:
GBC.fit(X_Train_Bal_Standardized_PCA40,Y_Train_Bal)


Out[560]:
GradientBoostingClassifier(ccp_alpha=0.0, criterion='friedman_mse', init=None,
                           learning_rate=0.1, loss='deviance', max_depth=3,
                           max_features=None, max_leaf_nodes=None,
                           min_impurity_decrease=0.0, min_impurity_split=None,
                           min_samples_leaf=1, min_samples_split=2,
                           min_weight_fraction_leaf=0.0, n_estimators=100,
                           n_iter_no_change=None, presort='deprecated',
                           random_state=100, subsample=1.0, tol=0.0001,
                           validation_fraction=0.1, verbose=0,
                           warm_start=False)

In [0]:
Y_TestPreds = GBC.predict(X_Test_Standardized_PCA40)

In [562]:
print(classification_report(Y_test,Y_TestPreds))


              precision    recall  f1-score   support

           0       0.98      0.83      0.90      5499
           1       0.30      0.80      0.44       502

    accuracy                           0.83      6001
   macro avg       0.64      0.81      0.67      6001
weighted avg       0.92      0.83      0.86      6001

Without Any Tuning Gradient Boost Got Recall of .80 on Churn Detection


In [0]:
# parameter grid
param_grid = {"learning_rate": [0.07],
              "subsample": [0.8],
              "n_estimators" : [150, 200],
              "max_depth" : [5,10]
             }

In [570]:
folds = 3
grid_search_GBC = GridSearchCV(GBC, 
                               cv = folds,
                               param_grid=param_grid, 
                               scoring = 'roc_auc', 
                               return_train_score=True,                         
                               verbose = 2,
                               n_jobs =5)

grid_search_GBC.fit(X_Train_Bal_Standardized_PCA40,Y_Train_Bal)


Fitting 3 folds for each of 4 candidates, totalling 12 fits
[Parallel(n_jobs=5)]: Using backend LokyBackend with 5 concurrent workers.
[Parallel(n_jobs=5)]: Done  10 out of  12 | elapsed: 21.3min remaining:  4.3min
[Parallel(n_jobs=5)]: Done  12 out of  12 | elapsed: 24.3min finished
Out[570]:
GridSearchCV(cv=3, error_score=nan,
             estimator=GradientBoostingClassifier(ccp_alpha=0.0,
                                                  criterion='friedman_mse',
                                                  init=None, learning_rate=0.1,
                                                  loss='deviance', max_depth=3,
                                                  max_features=None,
                                                  max_leaf_nodes=None,
                                                  min_impurity_decrease=0.0,
                                                  min_impurity_split=None,
                                                  min_samples_leaf=1,
                                                  min_samples_split=2,
                                                  min_weight_fraction_leaf=0.0,
                                                  n_estimators=100,
                                                  n_iter_no_change=None,
                                                  presort='deprecated',
                                                  random_state=100,
                                                  subsample=1.0, tol=0.0001,
                                                  validation_fraction=0.1,
                                                  verbose=0, warm_start=False),
             iid='deprecated', n_jobs=5,
             param_grid={'learning_rate': [0.07], 'max_depth': [5, 10],
                         'n_estimators': [150, 200], 'subsample': [0.8]},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=True,
             scoring='roc_auc', verbose=2)

In [571]:
print(grid_search_GBC.best_score_)
print(grid_search_GBC.best_params_)


0.9673505982867697
{'learning_rate': 0.07, 'max_depth': 10, 'n_estimators': 200, 'subsample': 0.8}

In [575]:
GBC_best = GradientBoostingClassifier(learning_rate=0.07,
                             max_depth=10, 
                             n_estimators=200,
                             subsample =0.8,
                             random_state=100)
GBC_best.fit(X_Train_Bal_Standardized_PCA40,Y_Train_Bal)


Out[575]:
GradientBoostingClassifier(ccp_alpha=0.0, criterion='friedman_mse', init=None,
                           learning_rate=0.07, loss='deviance', max_depth=10,
                           max_features=None, max_leaf_nodes=None,
                           min_impurity_decrease=0.0, min_impurity_split=None,
                           min_samples_leaf=1, min_samples_split=2,
                           min_weight_fraction_leaf=0.0, n_estimators=200,
                           n_iter_no_change=None, presort='deprecated',
                           random_state=100, subsample=0.8, tol=0.0001,
                           validation_fraction=0.1, verbose=0,
                           warm_start=False)

In [576]:
Y_TestPreds = GBC_best.predict(X_Test_Standardized_PCA40)
print(classification_report(Y_test,Y_TestPreds))


              precision    recall  f1-score   support

           0       0.96      0.93      0.95      5499
           1       0.44      0.63      0.52       502

    accuracy                           0.90      6001
   macro avg       0.70      0.78      0.73      6001
weighted avg       0.92      0.90      0.91      6001

  1. XGBoost

In [578]:
model = XGBClassifier()
model.fit(X_Train_Bal_Standardized_PCA40,Y_Train_Bal)


Out[578]:
XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, gamma=0,
              learning_rate=0.1, max_delta_step=0, max_depth=3,
              min_child_weight=1, missing=None, n_estimators=100, n_jobs=1,
              nthread=None, objective='binary:logistic', random_state=0,
              reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
              silent=None, subsample=1, verbosity=1)

In [579]:
Y_TestPreds = model.predict(X_Test_Standardized_PCA40)
print(classification_report(Y_test,Y_TestPreds))


              precision    recall  f1-score   support

           0       0.98      0.83      0.90      5499
           1       0.30      0.80      0.43       502

    accuracy                           0.83      6001
   macro avg       0.64      0.82      0.67      6001
weighted avg       0.92      0.83      0.86      6001

XG Boost also gives recall of 0.80 for Churn Prediction


In [0]:

Interpretability

We use the Logistic Regression here w/o PCA to determine insights.


In [0]:
import statsmodels.api as sm

In [588]:
logm1 = sm.GLM(Y_Train_Bal,(sm.add_constant(X_Train_Bal_Standardized)), family = sm.families.Binomial())
logm1.fit().summary()


Out[588]:
Generalized Linear Model Regression Results
Dep. Variable: y No. Observations: 43722
Model: GLM Df Residuals: 43585
Model Family: Binomial Df Model: 136
Link Function: logit Scale: 1.0000
Method: IRLS Log-Likelihood: -17553.
Date: Sat, 16 May 2020 Deviance: 35106.
Time: 20:44:23 Pearson chi2: 1.08e+05
No. Iterations: 100
Covariance Type: nonrobust
coef std err z P>|z| [0.025 0.975]
const -0.2527 0.016 -15.940 0.000 -0.284 -0.222
x1 0.0748 0.057 1.310 0.190 -0.037 0.187
x2 0.2797 0.065 4.303 0.000 0.152 0.407
x3 0.8895 0.075 11.842 0.000 0.742 1.037
x4 -2.6693 1.374 -1.942 0.052 -5.363 0.025
x5 -1.5668 1.224 -1.280 0.201 -3.967 0.833
x6 2.2196 1.026 2.163 0.031 0.208 4.231
x7 -1.9961 1.383 -1.443 0.149 -4.707 0.715
x8 -2.7738 1.204 -2.304 0.021 -5.134 -0.414
x9 2.0325 1.041 1.953 0.051 -0.007 4.072
x10 0.0589 0.025 2.317 0.021 0.009 0.109
x11 0.1112 0.032 3.510 0.000 0.049 0.173
x12 -0.0609 0.022 -2.714 0.007 -0.105 -0.017
x13 0.5029 0.347 1.448 0.148 -0.178 1.183
x14 0.6968 0.278 2.505 0.012 0.152 1.242
x15 -0.6279 0.311 -2.022 0.043 -1.237 -0.019
x16 2520.5958 562.421 4.482 0.000 1418.271 3622.921
x17 -909.7252 559.318 -1.626 0.104 -2005.968 186.517
x18 3852.5977 501.264 7.686 0.000 2870.139 4835.057
x19 2922.0808 652.099 4.481 0.000 1643.990 4200.172
x20 -959.1557 589.883 -1.626 0.104 -2115.305 196.993
x21 4209.1054 547.605 7.686 0.000 3135.819 5282.392
x22 313.3706 69.901 4.483 0.000 176.367 450.374
x23 -102.6249 63.006 -1.629 0.103 -226.115 20.865
x24 336.2401 43.750 7.686 0.000 250.493 421.988
x25 -0.0875 0.019 -4.564 0.000 -0.125 -0.050
x26 0.1101 0.024 4.610 0.000 0.063 0.157
x27 0.0103 0.018 0.562 0.574 -0.026 0.046
x28 -6901.1638 1206.684 -5.719 0.000 -9266.221 -4536.107
x29 -2240.1476 1190.990 -1.881 0.060 -4574.444 94.149
x30 -2065.4335 1122.143 -1.841 0.066 -4264.793 133.926
x31 2457.6706 1622.783 1.514 0.130 -722.926 5638.267
x32 -5026.4985 1611.261 -3.120 0.002 -8184.511 -1868.486
x33 5770.8891 1374.483 4.199 0.000 3076.953 8464.826
x34 2420.7257 1598.847 1.514 0.130 -712.956 5554.408
x35 -5078.6673 1628.369 -3.119 0.002 -8270.212 -1887.123
x36 5716.5212 1361.504 4.199 0.000 3048.022 8385.020
x37 52.3530 34.560 1.515 0.130 -15.384 120.090
x38 -117.3805 37.645 -3.118 0.002 -191.164 -43.597
x39 133.1149 31.748 4.193 0.000 70.890 195.339
x40 -6.696e-09 2.37e-08 -0.282 0.778 -5.32e-08 3.98e-08
x41 7.794e-08 4.21e-08 1.850 0.064 -4.62e-09 1.6e-07
x42 6.606e-08 1.72e-08 3.830 0.000 3.23e-08 9.99e-08
x43 -8723.6759 2771.497 -3.148 0.002 -1.42e+04 -3291.643
x44 -219.4057 2856.539 -0.077 0.939 -5818.119 5379.308
x45 -490.2995 2461.996 -0.199 0.842 -5315.723 4335.124
x46 -197.2020 62.057 -3.178 0.001 -318.831 -75.573
x47 -299.3703 69.835 -4.287 0.000 -436.244 -162.496
x48 300.0779 53.643 5.594 0.000 194.940 405.216
x49 -122.8299 38.693 -3.174 0.002 -198.667 -46.993
x50 -227.6985 53.052 -4.292 0.000 -331.679 -123.718
x51 255.2508 45.610 5.596 0.000 165.857 344.645
x52 -15.4076 4.831 -3.189 0.001 -24.877 -5.939
x53 -56.2123 13.109 -4.288 0.000 -81.906 -30.519
x54 75.7086 13.525 5.598 0.000 49.200 102.217
x55 5583.6201 1756.852 3.178 0.001 2140.254 9026.986
x56 8377.7982 1951.902 4.292 0.000 4552.141 1.22e+04
x57 -9505.2792 1697.893 -5.598 0.000 -1.28e+04 -6177.471
x58 -328.2410 378.530 -0.867 0.386 -1070.147 413.665
x59 5101.1106 367.740 13.872 0.000 4380.353 5821.868
x60 5151.9386 339.358 15.181 0.000 4486.809 5817.068
x61 -461.0601 531.722 -0.867 0.386 -1503.217 581.097
x62 7310.8646 527.003 13.873 0.000 6277.958 8343.771
x63 7172.8250 472.507 15.180 0.000 6246.729 8098.921
x64 -84.9977 97.984 -0.867 0.386 -277.044 107.048
x65 1529.2090 110.241 13.872 0.000 1313.141 1745.277
x66 1408.9462 92.800 15.183 0.000 1227.062 1590.831
x67 -3821.1257 1009.410 -3.786 0.000 -5799.533 -1842.718
x68 -8427.9077 985.835 -8.549 0.000 -1.04e+04 -6495.707
x69 3069.1028 938.098 3.272 0.001 1230.465 4907.740
x70 -1336.1406 242.635 -5.507 0.000 -1811.697 -860.584
x71 -326.1085 237.298 -1.374 0.169 -791.204 138.987
x72 465.4869 204.558 2.276 0.023 64.560 866.414
x73 -1680.9542 305.245 -5.507 0.000 -2279.223 -1082.685
x74 -471.7712 343.109 -1.375 0.169 -1144.253 200.711
x75 737.0093 323.657 2.277 0.023 102.654 1371.365
x76 -286.7015 52.062 -5.507 0.000 -388.742 -184.661
x77 -70.0710 50.961 -1.375 0.169 -169.953 29.811
x78 121.2475 53.275 2.276 0.023 16.830 225.665
x79 6.224e-08 4.53e-08 1.375 0.169 -2.65e-08 1.51e-07
x80 -1.336e-07 2.74e-08 -4.877 0.000 -1.87e-07 -7.99e-08
x81 2.733e-08 2.14e-08 1.277 0.202 -1.46e-08 6.93e-08
x82 441.6462 502.479 0.879 0.379 -543.195 1426.487
x83 1581.4866 551.160 2.869 0.004 501.232 2661.741
x84 5039.0494 508.206 9.915 0.000 4042.984 6035.115
x85 5169.6516 848.769 6.091 0.000 3506.095 6833.208
x86 -2376.9208 886.317 -2.682 0.007 -4114.070 -639.772
x87 -1.604e+04 840.162 -19.087 0.000 -1.77e+04 -1.44e+04
x88 -2.4761 0.427 -5.795 0.000 -3.314 -1.639
x89 0.9658 0.399 2.418 0.016 0.183 1.749
x90 5.6240 0.309 18.202 0.000 5.018 6.230
x91 -953.2086 156.511 -6.090 0.000 -1259.965 -646.453
x92 464.1058 173.060 2.682 0.007 124.914 803.297
x93 2899.7774 151.924 19.087 0.000 2602.011 3197.544
x94 -177.7868 29.178 -6.093 0.000 -234.975 -120.599
x95 90.0054 33.571 2.681 0.007 24.207 155.803
x96 542.2828 28.409 19.089 0.000 486.603 597.963
x97 -0.0747 0.028 -2.636 0.008 -0.130 -0.019
x98 0.3021 0.035 8.616 0.000 0.233 0.371
x99 -0.3927 0.033 -12.031 0.000 -0.457 -0.329
x100 0.1258 0.063 2.010 0.044 0.003 0.249
x101 -0.4331 0.068 -6.356 0.000 -0.567 -0.300
x102 -0.6658 0.078 -8.572 0.000 -0.818 -0.514
x103 -0.1992 0.030 -6.748 0.000 -0.257 -0.141
x104 0.1457 0.029 5.096 0.000 0.090 0.202
x105 0.3939 0.032 12.226 0.000 0.331 0.457
x106 -0.0325 0.022 -1.509 0.131 -0.075 0.010
x107 -0.0373 0.020 -1.904 0.057 -0.076 0.001
x108 -0.4868 0.023 -20.813 0.000 -0.533 -0.441
x109 0.1232 0.015 7.963 0.000 0.093 0.154
x110 0.0758 0.019 4.071 0.000 0.039 0.112
x111 -0.2312 0.021 -11.207 0.000 -0.272 -0.191
x112 0.2923 0.039 7.560 0.000 0.217 0.368
x113 0.1737 0.042 4.168 0.000 0.092 0.255
x114 -0.2218 0.046 -4.820 0.000 -0.312 -0.132
x115 -0.2589 0.047 -5.540 0.000 -0.350 -0.167
x116 -0.1820 0.050 -3.662 0.000 -0.279 -0.085
x117 0.0053 0.062 0.086 0.931 -0.115 0.126
x118 0.0053 0.021 0.256 0.798 -0.036 0.046
x119 0.1221 0.023 5.207 0.000 0.076 0.168
x120 -0.2759 0.028 -10.028 0.000 -0.330 -0.222
x121 0.0103 0.033 0.314 0.754 -0.054 0.075
x122 0.0874 0.036 2.432 0.015 0.017 0.158
x123 -0.0860 0.037 -2.298 0.022 -0.159 -0.013
x124 -0.1631 0.056 -2.914 0.004 -0.273 -0.053
x125 -0.3148 0.060 -5.258 0.000 -0.432 -0.197
x126 0.3975 0.058 6.838 0.000 0.284 0.511
x127 -0.0979 0.021 -4.674 0.000 -0.139 -0.057
x128 -0.1241 0.022 -5.590 0.000 -0.168 -0.081
x129 -0.1275 0.025 -5.105 0.000 -0.176 -0.079
x130 0.1119 0.016 7.203 0.000 0.081 0.142
x131 0.0306 0.018 1.676 0.094 -0.005 0.066
x132 -0.1871 0.020 -9.430 0.000 -0.226 -0.148
x133 0.0905 0.031 2.961 0.003 0.031 0.150
x134 0.0272 0.032 0.860 0.390 -0.035 0.089
x135 -0.1365 0.037 -3.653 0.000 -0.210 -0.063
x136 0.0574 0.021 2.688 0.007 0.016 0.099
x137 0.1527 0.028 5.417 0.000 0.097 0.208
x138 -0.0847 0.027 -3.086 0.002 -0.138 -0.031
x139 0.0874 0.062 1.405 0.160 -0.035 0.209
x140 0.3018 0.067 4.538 0.000 0.171 0.432
x141 -0.6006 0.066 -9.153 0.000 -0.729 -0.472
x142 -0.1328 0.016 -8.474 0.000 -0.164 -0.102
x143 -0.1630 0.028 -5.919 0.000 -0.217 -0.109
x144 0.0805 0.028 2.918 0.004 0.026 0.135
x145 -0.0253 0.024 -1.058 0.290 -0.072 0.022

In [0]:
from sklearn.linear_model import LogisticRegression
logreg = LogisticRegression(max_iter=500)

In [0]:
from sklearn.feature_selection import RFE
rfe = RFE(logreg, 40)             
rfe = rfe.fit(X_Train_Bal_Standardized, Y_Train_Bal)

In [597]:
rfe.support_


Out[597]:
array([False,  True,  True, False,  True, False, False,  True, False,
       False, False, False, False,  True, False, False, False,  True,
       False, False,  True,  True,  True, False, False, False, False,
       False, False,  True, False,  True, False, False, False, False,
       False, False,  True, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False,  True,  True, False, False,  True,  True,  True,  True,
       False, False, False, False, False,  True, False,  True,  True,
       False, False, False, False, False, False, False, False, False,
       False, False,  True, False, False, False, False, False,  True,
       False, False, False, False, False, False, False,  True,  True,
       False, False,  True, False, False,  True, False, False,  True,
        True,  True,  True, False,  True, False, False,  True, False,
       False, False,  True, False, False, False, False,  True,  True,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False,  True,  True,  True,  True, False,
       False])

In [0]:
Importance = list(zip(X_train.columns, rfe.support_, rfe.ranking_))

In [0]:
col = X_train.columns[rfe.support_]

In [0]:
Y_Pred = rfe.predict(X_Test_Standardized)

In [608]:
print(classification_report(Y_test,Y_TestPreds))


              precision    recall  f1-score   support

           0       0.98      0.83      0.90      5499
           1       0.30      0.80      0.43       502

    accuracy                           0.83      6001
   macro avg       0.64      0.82      0.67      6001
weighted avg       0.92      0.83      0.86      6001


In [623]:
print(Importance)


[('arpu_6', False, 21), ('arpu_7', True, 1), ('arpu_8', True, 1), ('onnet_mou_6', False, 8), ('onnet_mou_7', True, 1), ('onnet_mou_8', False, 68), ('offnet_mou_6', False, 70), ('offnet_mou_7', True, 1), ('offnet_mou_8', False, 95), ('roam_ic_mou_6', False, 52), ('roam_ic_mou_7', False, 6), ('roam_ic_mou_8', False, 69), ('roam_og_mou_6', False, 82), ('roam_og_mou_7', True, 1), ('roam_og_mou_8', False, 90), ('loc_og_t2t_mou_6', False, 10), ('loc_og_t2t_mou_7', False, 67), ('loc_og_t2t_mou_8', True, 1), ('loc_og_t2m_mou_6', False, 40), ('loc_og_t2m_mou_7', False, 17), ('loc_og_t2m_mou_8', True, 1), ('loc_og_t2f_mou_6', True, 1), ('loc_og_t2f_mou_7', True, 1), ('loc_og_t2f_mou_8', False, 29), ('loc_og_t2c_mou_6', False, 33), ('loc_og_t2c_mou_7', False, 32), ('loc_og_t2c_mou_8', False, 100), ('loc_og_mou_6', False, 81), ('loc_og_mou_7', False, 45), ('loc_og_mou_8', True, 1), ('std_og_t2t_mou_6', False, 44), ('std_og_t2t_mou_7', True, 1), ('std_og_t2t_mou_8', False, 49), ('std_og_t2m_mou_6', False, 9), ('std_og_t2m_mou_7', False, 28), ('std_og_t2m_mou_8', False, 57), ('std_og_t2f_mou_6', False, 83), ('std_og_t2f_mou_7', False, 46), ('std_og_t2f_mou_8', True, 1), ('std_og_t2c_mou_6', False, 106), ('std_og_t2c_mou_7', False, 101), ('std_og_t2c_mou_8', False, 102), ('std_og_mou_6', False, 7), ('std_og_mou_7', False, 18), ('std_og_mou_8', False, 16), ('isd_og_mou_6', False, 63), ('isd_og_mou_7', False, 19), ('isd_og_mou_8', False, 22), ('spl_og_mou_6', False, 35), ('spl_og_mou_7', False, 34), ('spl_og_mou_8', False, 85), ('og_others_6', False, 71), ('og_others_7', False, 61), ('og_others_8', False, 79), ('total_og_mou_6', False, 20), ('total_og_mou_7', True, 1), ('total_og_mou_8', True, 1), ('loc_ic_t2t_mou_6', False, 2), ('loc_ic_t2t_mou_7', False, 80), ('loc_ic_t2t_mou_8', True, 1), ('loc_ic_t2m_mou_6', True, 1), ('loc_ic_t2m_mou_7', True, 1), ('loc_ic_t2m_mou_8', True, 1), ('loc_ic_t2f_mou_6', False, 97), ('loc_ic_t2f_mou_7', False, 84), ('loc_ic_t2f_mou_8', False, 73), ('loc_ic_mou_6', False, 48), ('loc_ic_mou_7', False, 60), ('loc_ic_mou_8', True, 1), ('std_ic_t2t_mou_6', False, 47), ('std_ic_t2t_mou_7', True, 1), ('std_ic_t2t_mou_8', True, 1), ('std_ic_t2m_mou_6', False, 59), ('std_ic_t2m_mou_7', False, 58), ('std_ic_t2m_mou_8', False, 65), ('std_ic_t2f_mou_6', False, 98), ('std_ic_t2f_mou_7', False, 87), ('std_ic_t2f_mou_8', False, 41), ('std_ic_t2o_mou_6', False, 104), ('std_ic_t2o_mou_7', False, 105), ('std_ic_t2o_mou_8', False, 103), ('std_ic_mou_6', False, 96), ('std_ic_mou_7', False, 89), ('std_ic_mou_8', True, 1), ('total_ic_mou_6', False, 94), ('total_ic_mou_7', False, 86), ('total_ic_mou_8', False, 30), ('spl_ic_mou_6', False, 43), ('spl_ic_mou_7', False, 42), ('spl_ic_mou_8', True, 1), ('isd_ic_mou_6', False, 78), ('isd_ic_mou_7', False, 88), ('isd_ic_mou_8', False, 53), ('ic_others_6', False, 54), ('ic_others_7', False, 74), ('ic_others_8', False, 66), ('total_rech_num_6', False, 62), ('total_rech_num_7', True, 1), ('total_rech_num_8', True, 1), ('total_rech_amt_6', False, 50), ('total_rech_amt_7', False, 15), ('total_rech_amt_8', True, 1), ('max_rech_amt_6', False, 24), ('max_rech_amt_7', False, 23), ('max_rech_amt_8', True, 1), ('last_day_rch_amt_6', False, 72), ('last_day_rch_amt_7', False, 51), ('last_day_rch_amt_8', True, 1), ('total_rech_data_6', True, 1), ('total_rech_data_7', True, 1), ('total_rech_data_8', True, 1), ('max_rech_data_6', False, 5), ('max_rech_data_7', True, 1), ('max_rech_data_8', False, 4), ('av_rech_amt_data_6', False, 12), ('av_rech_amt_data_7', True, 1), ('av_rech_amt_data_8', False, 93), ('vol_2g_mb_6', False, 99), ('vol_2g_mb_7', False, 14), ('vol_2g_mb_8', True, 1), ('vol_3g_mb_6', False, 77), ('vol_3g_mb_7', False, 64), ('vol_3g_mb_8', False, 75), ('night_pck_user_6', False, 25), ('night_pck_user_7', True, 1), ('night_pck_user_8', True, 1), ('monthly_2g_6', False, 3), ('monthly_2g_7', False, 13), ('monthly_2g_8', False, 31), ('sachet_2g_6', False, 55), ('sachet_2g_7', False, 91), ('sachet_2g_8', False, 38), ('monthly_3g_6', False, 36), ('monthly_3g_7', False, 92), ('monthly_3g_8', False, 37), ('sachet_3g_6', False, 56), ('sachet_3g_7', False, 11), ('sachet_3g_8', False, 39), ('fb_user_6', False, 26), ('fb_user_7', True, 1), ('fb_user_8', True, 1), ('aon', True, 1), ('aug_vbc_3g', True, 1), ('jul_vbc_3g', False, 27), ('jun_vbc_3g', False, 76)]

In [624]:
def sortingVal(row):
  return row[2]

Importance.sort( key=sortingVal)

print (Importance)


[('arpu_7', True, 1), ('arpu_8', True, 1), ('onnet_mou_7', True, 1), ('offnet_mou_7', True, 1), ('roam_og_mou_7', True, 1), ('loc_og_t2t_mou_8', True, 1), ('loc_og_t2m_mou_8', True, 1), ('loc_og_t2f_mou_6', True, 1), ('loc_og_t2f_mou_7', True, 1), ('loc_og_mou_8', True, 1), ('std_og_t2t_mou_7', True, 1), ('std_og_t2f_mou_8', True, 1), ('total_og_mou_7', True, 1), ('total_og_mou_8', True, 1), ('loc_ic_t2t_mou_8', True, 1), ('loc_ic_t2m_mou_6', True, 1), ('loc_ic_t2m_mou_7', True, 1), ('loc_ic_t2m_mou_8', True, 1), ('loc_ic_mou_8', True, 1), ('std_ic_t2t_mou_7', True, 1), ('std_ic_t2t_mou_8', True, 1), ('std_ic_mou_8', True, 1), ('spl_ic_mou_8', True, 1), ('total_rech_num_7', True, 1), ('total_rech_num_8', True, 1), ('total_rech_amt_8', True, 1), ('max_rech_amt_8', True, 1), ('last_day_rch_amt_8', True, 1), ('total_rech_data_6', True, 1), ('total_rech_data_7', True, 1), ('total_rech_data_8', True, 1), ('max_rech_data_7', True, 1), ('av_rech_amt_data_7', True, 1), ('vol_2g_mb_8', True, 1), ('night_pck_user_7', True, 1), ('night_pck_user_8', True, 1), ('fb_user_7', True, 1), ('fb_user_8', True, 1), ('aon', True, 1), ('aug_vbc_3g', True, 1), ('loc_ic_t2t_mou_6', False, 2), ('monthly_2g_6', False, 3), ('max_rech_data_8', False, 4), ('max_rech_data_6', False, 5), ('roam_ic_mou_7', False, 6), ('std_og_mou_6', False, 7), ('onnet_mou_6', False, 8), ('std_og_t2m_mou_6', False, 9), ('loc_og_t2t_mou_6', False, 10), ('sachet_3g_7', False, 11), ('av_rech_amt_data_6', False, 12), ('monthly_2g_7', False, 13), ('vol_2g_mb_7', False, 14), ('total_rech_amt_7', False, 15), ('std_og_mou_8', False, 16), ('loc_og_t2m_mou_7', False, 17), ('std_og_mou_7', False, 18), ('isd_og_mou_7', False, 19), ('total_og_mou_6', False, 20), ('arpu_6', False, 21), ('isd_og_mou_8', False, 22), ('max_rech_amt_7', False, 23), ('max_rech_amt_6', False, 24), ('night_pck_user_6', False, 25), ('fb_user_6', False, 26), ('jul_vbc_3g', False, 27), ('std_og_t2m_mou_7', False, 28), ('loc_og_t2f_mou_8', False, 29), ('total_ic_mou_8', False, 30), ('monthly_2g_8', False, 31), ('loc_og_t2c_mou_7', False, 32), ('loc_og_t2c_mou_6', False, 33), ('spl_og_mou_7', False, 34), ('spl_og_mou_6', False, 35), ('monthly_3g_6', False, 36), ('monthly_3g_8', False, 37), ('sachet_2g_8', False, 38), ('sachet_3g_8', False, 39), ('loc_og_t2m_mou_6', False, 40), ('std_ic_t2f_mou_8', False, 41), ('spl_ic_mou_7', False, 42), ('spl_ic_mou_6', False, 43), ('std_og_t2t_mou_6', False, 44), ('loc_og_mou_7', False, 45), ('std_og_t2f_mou_7', False, 46), ('std_ic_t2t_mou_6', False, 47), ('loc_ic_mou_6', False, 48), ('std_og_t2t_mou_8', False, 49), ('total_rech_amt_6', False, 50), ('last_day_rch_amt_7', False, 51), ('roam_ic_mou_6', False, 52), ('isd_ic_mou_8', False, 53), ('ic_others_6', False, 54), ('sachet_2g_6', False, 55), ('sachet_3g_6', False, 56), ('std_og_t2m_mou_8', False, 57), ('std_ic_t2m_mou_7', False, 58), ('std_ic_t2m_mou_6', False, 59), ('loc_ic_mou_7', False, 60), ('og_others_7', False, 61), ('total_rech_num_6', False, 62), ('isd_og_mou_6', False, 63), ('vol_3g_mb_7', False, 64), ('std_ic_t2m_mou_8', False, 65), ('ic_others_8', False, 66), ('loc_og_t2t_mou_7', False, 67), ('onnet_mou_8', False, 68), ('roam_ic_mou_8', False, 69), ('offnet_mou_6', False, 70), ('og_others_6', False, 71), ('last_day_rch_amt_6', False, 72), ('loc_ic_t2f_mou_8', False, 73), ('ic_others_7', False, 74), ('vol_3g_mb_8', False, 75), ('jun_vbc_3g', False, 76), ('vol_3g_mb_6', False, 77), ('isd_ic_mou_6', False, 78), ('og_others_8', False, 79), ('loc_ic_t2t_mou_7', False, 80), ('loc_og_mou_6', False, 81), ('roam_og_mou_6', False, 82), ('std_og_t2f_mou_6', False, 83), ('loc_ic_t2f_mou_7', False, 84), ('spl_og_mou_8', False, 85), ('total_ic_mou_7', False, 86), ('std_ic_t2f_mou_7', False, 87), ('isd_ic_mou_7', False, 88), ('std_ic_mou_7', False, 89), ('roam_og_mou_8', False, 90), ('sachet_2g_7', False, 91), ('monthly_3g_7', False, 92), ('av_rech_amt_data_8', False, 93), ('total_ic_mou_6', False, 94), ('offnet_mou_8', False, 95), ('std_ic_mou_6', False, 96), ('loc_ic_t2f_mou_6', False, 97), ('std_ic_t2f_mou_6', False, 98), ('vol_2g_mb_6', False, 99), ('loc_og_t2c_mou_8', False, 100), ('std_og_t2c_mou_7', False, 101), ('std_og_t2c_mou_8', False, 102), ('std_ic_t2o_mou_8', False, 103), ('std_ic_t2o_mou_6', False, 104), ('std_ic_t2o_mou_7', False, 105), ('std_og_t2c_mou_6', False, 106)]

In [632]:
# Top 10 Predictors for Churn
for item in Importance[:10]:
    print(item[0])


arpu_7
arpu_8
onnet_mou_7
offnet_mou_7
roam_og_mou_7
loc_og_t2t_mou_8
loc_og_t2m_mou_8
loc_og_t2f_mou_6
loc_og_t2f_mou_7
loc_og_mou_8

In [0]: