In [2]:
    
!pip install xlrd
!pip install matplotlib
    
    
In [3]:
    
import matplotlib.pyplot as plt
import pandas as pd
%matplotlib inline
    
    
In [4]:
    
df = pd.read_csv("heights_weights_genders.csv")
    
In [5]:
    
df.tail()
    
    Out[5]:
In [6]:
    
df_males = df[df['Gender'] == 'Male']
    
In [7]:
    
df_females = df[df['Gender'] == 'Female']
    
In [8]:
    
plt.style.use('fivethirtyeight')
    
In [9]:
    
df_females.median()
    
    Out[9]:
In [10]:
    
df_males.median()
    
    Out[10]:
In [11]:
    
df_females.mean()
    
    Out[11]:
In [13]:
    
df_males.mean()
    
    Out[13]:
In [16]:
    
df_females['Height'].max() - df_females['Height'].min()
    
    Out[16]:
In [17]:
    
df_females['Weight'].max() - df_females['Weight'].min()
    
    Out[17]:
In [18]:
    
df_males['Height'].max() - df_males['Height'].min()
    
    Out[18]:
In [19]:
    
df_males['Weight'].max() - df_males['Weight'].min()
    
    Out[19]:
In [55]:
    
iqr_f = df_females.quantile(q=0.75)- df_females.quantile(q=0.25)
iqr_f
    
    Out[55]:
In [56]:
    
iqr_m = df_males.quantile(q=0.75)- df_males.quantile(q=0.25)
iqr_m
    
    Out[56]:
In [64]:
    
UAL_f= (iqr_f*1.5) + df_females.quantile(q=0.75)
UAL_f
    
    Out[64]:
In [65]:
    
LAL_f= df_females.quantile(q=0.25) - (iqr_f*1.5)  
LAL_f
    
    Out[65]:
In [66]:
    
UAL_m= (iqr_m*1.5) + df_males.quantile(q=0.75)
UAL_m
    
    Out[66]:
In [67]:
    
LAL_m= df_males.quantile(q=0.25) - (iqr_m*1.5)  
LAL_m
    
    Out[67]:
In [16]:
    
df_females.corr()
    
    Out[16]:
In [10]:
    
df_males.corr()
    
    Out[10]:
In [50]:
    
len(df_males)
    
    Out[50]:
In [34]:
    
len(df_females[df_females['Height']> 71.284662])
    
    Out[34]:
In [35]:
    
len(df_females[df_females['Height']< 56.173345])  #In total 25 outliers
    
    Out[35]:
In [69]:
    
len(df_females[df_females['Weight']> 188.515978])
    
    Out[69]:
In [70]:
    
len(df_females[df_females['Weight']< 83.229044]) #In total 25 outliers
    
    Out[70]:
In [40]:
    
len(df_males[df_males['Height']> 76.709840])
    
    Out[40]:
In [44]:
    
len(df_males[df_males['Height']< 61.453582])  #In total 46 outliers
    
    Out[44]:
In [45]:
    
len(df_males[df_males['Weight']>240.062854])
    
    Out[45]:
In [71]:
    
len(df_males[df_males['Weight']< 134.182716]) #In total 46 outliers
    
    Out[71]:
In [17]:
    
#fig, ax = plt.subplots()
ax= df_males.plot(kind='scatter', y='Height', x='Weight', color='darkblue', figsize= (7,5))
ax_f= df_females.plot(kind='scatter', y='Height', x='Weight', color='orange', figsize= (7,5))
ax_f.set_ylim([50, 75])
ax_f.set_xlim([60, 220])
    
    Out[17]:
    
    
The correlation coeficient of weight and height for men and women is very similar, with 84% for women and 86% in men.
In [ ]:
    
    
In [ ]: