In [1]:
import pandas as pd
import matplotlib
import numpy
%matplotlib inline
In [2]:
df = pd.read_csv("heights_weights_genders.csv")
In [12]:
df.head()
Out[12]:
In [20]:
df.groupby('Gender').corr()
Out[20]:
In [19]:
df.groupby('Gender').plot(kind='scatter', x='Height', y='Weight')
Out[19]:
In both cases, there is a highly positive correlation between height and weight, with a slightly higher correlation for men (r = 0.862979) than for women (r = 0.849609).