Correlation of height and weight in Men & Women

Using the heights_weights_genders.csv, analyze the difference between the height weight correlation in women and men.


In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use('ggplot')

In [3]:
df = pd.read_csv("data/heights_weights_genders.csv")

In [4]:
df.groupby('Gender').count()


Out[4]:
Height Weight
Gender
Female 5000 5000
Male 5000 5000

In [5]:
df.plot(kind='scatter', y='Height', x='Weight')


Out[5]:
<matplotlib.axes._subplots.AxesSubplot at 0x106964f60>

In [6]:
df[df['Gender'] == 'Male'].plot(kind='scatter', y='Height', x='Weight')


Out[6]:
<matplotlib.axes._subplots.AxesSubplot at 0x1069e0ac8>

In [7]:
df[df['Gender'] == 'Female'].plot(kind='scatter', y='Height', x='Weight')


Out[7]:
<matplotlib.axes._subplots.AxesSubplot at 0x107b11cf8>

In [8]:
df[df['Gender'] == 'Female'].corr()


Out[8]:
Height Weight
Height 1.000000 0.849609
Weight 0.849609 1.000000

In [9]:
df[df['Gender'] == 'Male'].corr()


Out[9]:
Height Weight
Height 1.000000 0.862979
Weight 0.862979 1.000000

In [10]:
df[df['Gender'] == 'Female'].quantile(.25)


Out[10]:
Height     61.894441
Weight    122.934096
dtype: float64

In [11]:
df[df['Gender'] == 'Female'].quantile(.5)


Out[11]:
Height     63.730924
Weight    136.117583
dtype: float64

In [12]:
df[df['Gender'] == 'Female'].quantile(.75)


Out[12]:
Height     65.563565
Weight    148.810926
dtype: float64

In [14]:
df[df['Gender'] == 'Female'].mean()


Out[14]:
Height     63.708774
Weight    135.860093
dtype: float64

In [ ]:
#Working out the standard deviation in weight and height for women

In [15]:
df[df['Gender'] == 'Female'].std()


Out[15]:
Height     2.696284
Weight    19.022468
dtype: float64

In [16]:
df[df['Gender'] == 'Male'].std()


Out[16]:
Height     2.863362
Weight    19.781155
dtype: float64

In [ ]: