In [11]:
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt
In [2]:
df = pd.read_csv("heights_weights_genders.csv")
In [4]:
df.head()
Out[4]:
In [5]:
df.plot(kind='scatter',x='Height',y='Weight')
Out[5]:
In [24]:
df.corr()
Out[24]:
In [23]:
fig, ax = plt.subplots(figsize=(10,5))
ax.set_prop_cycle('color',['LIGHTPINK','LIGHTSKYBLUE'])
for gender, selection in df.groupby("Gender"):
ax.plot(selection['Height'], selection["Weight"], label=gender,marker='o',linestyle="", markeredgewidth=0)
ax.legend(loc='upper left')
Out[23]:
In [25]:
# Male correlation
df[df['Gender']=="Male"].corr()['Height']['Weight']
Out[25]:
In [27]:
# Female correlation
df[df['Gender']=='Female'].corr()['Height']['Weight']
Out[27]:
In [ ]: