In [46]:
import pandas as pd
import numpy as np
In [5]:
data = pd.read_csv('../data/telecom_churn.csv')
In [7]:
data.head()
Out[7]:
In [10]:
data.shape
Out[10]:
In [14]:
print(data.columns)
In [16]:
print(data.info())
In [23]:
data['Churn'] = data['Churn'].astype('int64')
In [24]:
print(data.describe())
In [22]:
print(data.describe(include=['bool', 'object']))
In [33]:
data['Churn'].value_counts()
Out[33]:
In [35]:
data['Area code'].value_counts(normalize = True)
Out[35]:
In [37]:
data.sort_values(by="Total day charge", ascending=False).head()
Out[37]:
In [38]:
data['Churn'].mean()
Out[38]:
In [40]:
data[data['Churn'] == 1].mean(), \
data[data['Churn'] == 0].mean()
Out[40]:
In [42]:
data[data['Churn'] == 1]['Total day minutes'].mean()
Out[42]:
In [44]:
data[(data['Churn'] == 0) & (data['International plan'] == 'No')]['Total intl minutes'].max()
Out[44]:
In [47]:
data.apply(np.max)
Out[47]:
In [50]:
columns = ["Total day minutes", 'Total night minutes', 'Total intl minutes']
data.groupby(['Churn'])[columns].describe()
Out[50]:
In [52]:
data.groupby(['Churn'])[columns].agg([np.mean, np.std, np.min, np.max])
Out[52]:
In [53]:
pd.crosstab(data['Churn'], data['International plan'])
Out[53]:
In [55]:
pd.crosstab(data['Churn'], data['Voice mail plan'], normalize=True)
Out[55]:
In [59]:
data.pivot_table(['Total day calls', 'Total day calls', 'Total night calls'], ['Area code'], aggfunc='sum'), \
data.pivot_table(['Total day calls', 'Total day calls', 'Total night calls'], ['Area code'], aggfunc='mean')
Out[59]:
In [61]:
data['Total calls'] = data['Total day calls'] + data['Total night calls'] + data['Total eve calls'] + data['Total intl calls']
data.head()
Out[61]:
In [62]:
data['Total charge'] = data['Total day charge'] + data['Total night charge'] + data['Total eve charge'] + data['Total intl charge']
data.head()
Out[62]:
In [68]:
data['Column for delete'] = 1
data.head()
data = data.drop(['Column for delete'], axis=1)
data.head()
Out[68]:
In [75]:
pd.crosstab(data['Churn'], data['International plan'], margins=True, normalize=True) * 100
Out[75]:
In [74]:
pd.crosstab(data['Churn'], data['Customer service calls'], margins=True, normalize=True) * 100
Out[74]:
In [81]:
pd.crosstab(data[data['Churn'] == 0]['Churn'], data['Customer service calls'], margins=True, normalize=True) * 100
Out[81]:
In [80]:
pd.crosstab(data[data['Churn'] == 1]['Churn'], data['Customer service calls'], margins=True, normalize=True) * 100
Out[80]:
In [95]:
data['Many_service_calls'] = (data['Customer service calls'] > 3).astype('int')
data.head()
Out[95]:
In [96]:
pd.crosstab(data['Many_service_calls'], data['Churn'], margins=True)
Out[96]:
In [99]:
pd.crosstab(data['Many_service_calls'] & data['International plan'] , data['Churn'])
Out[99]: