notebook.community

Edit and run



In [11]:

    
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt



In [2]:

    
df = pd.read_csv("heights_weights_genders.csv")



In [4]:

    
df.head()



In [5]:

    
df.plot(kind='scatter',x='Height',y='Weight')









    Out[5]:





<matplotlib.axes._subplots.AxesSubplot at 0x10aedf710>



In [24]:

    
df.corr()



In [23]:

    
fig, ax = plt.subplots(figsize=(10,5))
ax.set_prop_cycle('color',['LIGHTPINK','LIGHTSKYBLUE'])

for gender, selection in df.groupby("Gender"):
    ax.plot(selection['Height'], selection["Weight"], label=gender,marker='o',linestyle="", markeredgewidth=0)

ax.legend(loc='upper left')









    Out[23]:





<matplotlib.legend.Legend at 0x10b9f7c18>



In [25]:

    
# Male correlation
df[df['Gender']=="Male"].corr()['Height']['Weight']









    Out[25]:





0.86297884861631291



In [27]:

    
# Female correlation
df[df['Gender']=='Female'].corr()['Height']['Weight']









    Out[27]:





0.8496085914186009



In [ ]:

	Gender	Height	Weight
0	Male	73.847017	241.893563
1	Male	68.781904	162.310473
2	Male	74.110105	212.740856
3	Male	71.730978	220.042470
4	Male	69.881796	206.349801