In [10]:
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
In [3]:
df = pd.read_csv('data/heights_weights_genders.csv')
In [4]:
print(df.head)
In [5]:
df.corr()
Out[5]:
In [6]:
Male = df[df['Gender'] == 'Male']
Female = df[df['Gender'] == 'Female']
In [7]:
Male.corr()
Out[7]:
In [8]:
Female.corr()
Out[8]:
Findings: For males there was a slightly higher positive correlation between height and weight at 86.29% whereas for females the correlation was 84.96%. Although, I am unclear why prior to seperating by gender the correlariton was 92% which is higher than both groups as individuals.
In [19]:
ax= Male.plot(kind='scatter', x='Height', y='Weight', alpha=0.1)
ax.set_title('Male Height Vs. Weight ')
ax.set_xlabel('Height (In.)')
ax.set_ylabel('Weight (Lbs.)')
Out[19]:
In [20]:
ax = Female.plot(kind='scatter', x='Height', y='Weight', alpha=0.1)
ax.set_title('Female Height Vs. Weight ')
ax.set_xlabel('Height (In.)')
ax.set_ylabel('Weight (Lbs.)')
Out[20]:
In [ ]:
In [ ]: