Assignment 3

Using the heights_weights_genders.csv, analyze the difference between the height weight correlation in women and men.


In [19]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib
%matplotlib inline

In [3]:
# Read in the data.
df = pd.read_csv('data/heights_weights_genders.csv')
df.head()


Out[3]:
Gender Height Weight
0 Male 73.847017 241.893563
1 Male 68.781904 162.310473
2 Male 74.110105 212.740856
3 Male 71.730978 220.042470
4 Male 69.881796 206.349801

In [6]:
# Calculate the correlation between height and weight for the men. 
df[df['Gender'] == 'Male'].corr()


Out[6]:
Height Weight
Height 1.000000 0.862979
Weight 0.862979 1.000000

In [7]:
# Calculate the correlation between height and weight for the women. 
df[df['Gender'] == 'Female'].corr()


Out[7]:
Height Weight
Height 1.000000 0.849609
Weight 0.849609 1.000000

In [36]:
# Calculate the difference between the male and female correlation of Height and Weight. 
df[df['Gender'] == 'Male'].corr() - df[df['Gender'] == 'Female'].corr()


Out[36]:
Height Weight
Height 0.00000 0.01337
Weight 0.01337 0.00000

In [33]:
# Plot it. 
# Let's get an overview by plotting it.
fig, ax=plt.subplots(figsize=(10,7))
df[df['Gender'] == 'Male'].plot(kind='scatter', x='Height', y='Weight', color='darkblue', ax=ax)
df[df['Gender'] == 'Female'].plot(kind='scatter', x='Height', y='Weight', color='red', ax=ax)


Out[33]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f8354bcc9e8>