In [16]:
import pandas as pd
import matplotlib.pyplot as plt
import scipy
%matplotlib inline
plt.style.use('ggplot')

In [17]:
df=pd.read_csv('//Users/mercyemelike/Desktop/Lede-Program/algorithms/class4/homework/data/heights_weights_genders.csv')

In [18]:
df.head()


Out[18]:
Gender Height Weight
0 Male 73.847017 241.893563
1 Male 68.781904 162.310473
2 Male 74.110105 212.740856
3 Male 71.730978 220.042470
4 Male 69.881796 206.349801

In [27]:
fig, (ax1, ax2) = plt.subplots(nrows=1, ncols=2)

df[df['Gender'] == 'Male'].plot(x='Height', y='Weight', legend=False, ax=ax1, sharex=True, sharey=True, kind='scatter')
ax1.set_title("Male")

df[df['Gender'] == 'Female'].plot(x='Height', y='Weight', legend=False, ax=ax2, sharex=True, sharey=True, kind='scatter')
ax2.set_title("Female")

plt.tight_layout()



In [29]:
df[df['Gender']=='Male'].corr()


Out[29]:
Height Weight
Height 1.000000 0.862979
Weight 0.862979 1.000000

In [30]:
df[df['Gender']=='Female'].corr()


Out[30]:
Height Weight
Height 1.000000 0.849609
Weight 0.849609 1.000000

In [ ]: