Using the heights_weights_genders.csv, analyze the difference between the height weight correlation in women and men.
In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
In [3]:
df=pd.read_csv('/home/sean/git/algorithms/class4/homework/data/heights_weights_genders.csv')
In [5]:
df.groupby(by='Gender').describe()
Out[5]:
In [6]:
male_df=df[df['Gender']=='Male']
In [7]:
female_df=df[df['Gender']=='Female']
In [30]:
ax.scatter?
In [45]:
fig, ax = plt.subplots()
fitm = np.polyfit(male_df['Height'], male_df['Weight'], deg=1)
fitf = np.polyfit(female_df['Height'], female_df['Weight'], deg=1)
ax.plot(female_df['Height'], fit[0] * female_df['Height'] + fitf[1], color='magenta', alpha=0.5)
ax.scatter(female_df['Height'], female_df['Weight'], alpha=0.1, c='pink', linewidths=0)
ax.plot(male_df['Height'], fit[0] * male_df['Height'] + fitm[1], color='cyan', alpha=0.5)
ax.scatter(male_df['Height'], male_df['Weight'], alpha=0.1, c='blue', linewidths=0)
Out[45]:
In [36]:
df.corr()
Out[36]:
In [37]:
male_df.corr()
Out[37]:
In [38]:
female_df.corr()
Out[38]:
In [ ]: