In [15]:
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
df = pd.read_csv('data/heights_weights_genders.csv')
df.head()


Out[2]:
Gender Height Weight
0 Male 73.847017 241.893563
1 Male 68.781904 162.310473
2 Male 74.110105 212.740856
3 Male 71.730978 220.042470
4 Male 69.881796 206.349801

In [16]:
plt.style.use('ggplot')
fig, ax = plt.subplots(figsize=(10,5))
for category, group in df.groupby('Gender'):

    ax.plot(group['Height'], group['Weight'], marker='o', linestyle='', label=category,markersize=10, markeredgewidth=0, alpha=.5)
    ax.legend(loc='upper left')



In [13]:
df[df['Gender']=='Male'].corr()['Height']['Weight']


Out[13]:
0.86297884861631291

In [14]:
df[df['Gender']=='Female'].corr()['Height']['Weight']


Out[14]:
0.8496085914186009

In [ ]: