In [19]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib
%matplotlib inline
In [3]:
# Read in the data.
df = pd.read_csv('data/heights_weights_genders.csv')
df.head()
Out[3]:
In [6]:
# Calculate the correlation between height and weight for the men.
df[df['Gender'] == 'Male'].corr()
Out[6]:
In [7]:
# Calculate the correlation between height and weight for the women.
df[df['Gender'] == 'Female'].corr()
Out[7]:
In [36]:
# Calculate the difference between the male and female correlation of Height and Weight.
df[df['Gender'] == 'Male'].corr() - df[df['Gender'] == 'Female'].corr()
Out[36]:
In [33]:
# Plot it.
# Let's get an overview by plotting it.
fig, ax=plt.subplots(figsize=(10,7))
df[df['Gender'] == 'Male'].plot(kind='scatter', x='Height', y='Weight', color='darkblue', ax=ax)
df[df['Gender'] == 'Female'].plot(kind='scatter', x='Height', y='Weight', color='red', ax=ax)
Out[33]: