In [1]:
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns
%matplotlib inline
In [2]:
df = pd.read_csv('../data/telecom_churn.csv')
In [3]:
df.head()
Out[3]:
In [4]:
df['Total day minutes'].hist();
In [5]:
sns.boxplot(df['Total day minutes']);
In [6]:
df.hist();
In [7]:
df['State'].value_counts().head()
Out[7]:
In [8]:
df['Churn'].value_counts()
Out[8]:
In [9]:
sns.countplot(df['Churn']);
In [10]:
sns.countplot(df['State']);
In [11]:
sns.countplot(df[df['State'].\
isin(df['State'].value_counts().head().index)]['State']);
In [12]:
feat = [f for f in df.columns if 'charge' in f]
df[feat].hist();
In [13]:
sns.pairplot(df[feat]);
In [14]:
df['Churn'].map({False: 'blue', True: 'orange'}).head()
Out[14]:
In [15]:
df[~df['Churn']].head()
Out[15]:
In [16]:
plt.scatter(df[df['Churn']]['Total eve charge'],
df[df['Churn']]['Total intl charge'],
color='orange', label='churn');
plt.scatter(df[~df['Churn']]['Total eve charge'],
df[~df['Churn']]['Total intl charge'],
color='blue', label='loyal');
plt.xlabel('Вечерние начисления');
plt.ylabel('Межнар. начисления');
plt.title('Распределение начислений для лояльных/ушедших');
plt.legend();
In [17]:
sns.heatmap(df.corr());
In [18]:
df.drop(feat, axis=1, inplace=True)
In [19]:
sns.heatmap(df.corr());
In [20]:
sns.boxplot(x='Churn', y='Total day minutes', data=df);
In [21]:
sns.boxplot(x='State', y='Total day minutes', data=df);
In [22]:
sns.violinplot(x='Churn', y='Total day minutes', data=df);
In [23]:
df.groupby('International plan')['Total day minutes'].mean()
Out[23]:
In [24]:
sns.boxplot(x='International plan', y='Total day minutes', data=df);
In [25]:
pd.crosstab(df['Churn'], df['International plan'])
Out[25]:
In [26]:
sns.countplot(x='International plan', hue='Churn', data=df);
In [27]:
sns.countplot(x='Customer service calls', hue='Churn', data=df);
In [28]:
from sklearn.manifold import TSNE
In [29]:
tsne = TSNE(random_state=0)
In [30]:
df2 = df.drop(['State', 'Churn'], axis=1)
In [31]:
df2['International plan'] = df2['International plan'].map({'Yes': 1,
'No': 0})
df2['Voice mail plan'] = df2['Voice mail plan'].map({'Yes': 1,
'No': 0})
In [32]:
df2.info()
In [33]:
%%time
tsne.fit(df2)
Out[33]:
In [34]:
plt.scatter(tsne.embedding_[df['Churn'].values, 0],
tsne.embedding_[df['Churn'].values, 1],
color='orange', alpha=.7);
plt.scatter(tsne.embedding_[~df['Churn'].values, 0],
tsne.embedding_[~df['Churn'].values, 1],
color='blue', alpha=.7);