In [11]:
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt

In [2]:
df = pd.read_csv("heights_weights_genders.csv")

In [4]:
df.head()


Out[4]:
Gender Height Weight
0 Male 73.847017 241.893563
1 Male 68.781904 162.310473
2 Male 74.110105 212.740856
3 Male 71.730978 220.042470
4 Male 69.881796 206.349801

In [5]:
df.plot(kind='scatter',x='Height',y='Weight')


Out[5]:
<matplotlib.axes._subplots.AxesSubplot at 0x10aedf710>

In [24]:
df.corr()


Out[24]:
Height Weight
Height 1.000000 0.924756
Weight 0.924756 1.000000

In [23]:
fig, ax = plt.subplots(figsize=(10,5))
ax.set_prop_cycle('color',['LIGHTPINK','LIGHTSKYBLUE'])

for gender, selection in df.groupby("Gender"):
    ax.plot(selection['Height'], selection["Weight"], label=gender,marker='o',linestyle="", markeredgewidth=0)

ax.legend(loc='upper left')


Out[23]:
<matplotlib.legend.Legend at 0x10b9f7c18>

In [25]:
# Male correlation
df[df['Gender']=="Male"].corr()['Height']['Weight']


Out[25]:
0.86297884861631291

In [27]:
# Female correlation
df[df['Gender']=='Female'].corr()['Height']['Weight']


Out[27]:
0.8496085914186009

In [ ]: