In [2]:
# import libraries
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np; import pandas as pd

In [3]:
claims = pd.read_csv('insurance.csv')

In [4]:
claims.shape


Out[4]:
(1338, 7)

In [5]:
claims.columns


Out[5]:
Index(['age', 'sex', 'bmi', 'children', 'smoker', 'region', 'charges'], dtype='object')

In [6]:
claims.head()


Out[6]:
age sex bmi children smoker region charges
0 19 female 27.900 0 yes southwest 16884.92400
1 18 male 33.770 1 no southeast 1725.55230
2 28 male 33.000 3 no southeast 4449.46200
3 33 male 22.705 0 no northwest 21984.47061
4 32 male 28.880 0 no northwest 3866.85520

In [7]:
sns.distplot(claims.charges)


Out[7]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fdb58d30fd0>

In [8]:
sns.violinplot(x="sex", y="charges", data=claims)


Out[8]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fdb58c4b550>

In [9]:
sns.violinplot(x="sex", y="charges", hue='smoker', data=claims)


Out[9]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fdb533a1ac8>

In [10]:
sns.violinplot(x='sex', y='charges', hue='smoker', data=claims, inner='quartile')


Out[10]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fdb533a1780>

In [11]:
claims.children.unique()


Out[11]:
array([0, 1, 3, 2, 5, 4])

In [12]:
sns.violinplot(x='children', y='charges', hue='smoker', data=claims, inner='quartile')


Out[12]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fdb5312b2b0>

In [16]:
g = sns.PairGrid(claims, hue='smoker')
g.map_diag(plt.hist)
g.map_offdiag(plt.scatter)
g.add_legend();



In [18]:
g = sns.PairGrid(claims, hue='sex')
g.map_diag(plt.hist)
g.map_offdiag(plt.scatter)
g.add_legend();



In [20]:
g = sns.pairplot(claims, hue='sex', diag_kind="kde")



In [21]:
g = sns.pairplot(claims, hue='smoker', diag_kind="kde")



In [ ]: