In [2]:
!pip install xlrd
!pip install matplotlib
In [3]:
import matplotlib.pyplot as plt
import pandas as pd
%matplotlib inline
In [4]:
df = pd.read_csv("heights_weights_genders.csv")
In [5]:
df.tail()
Out[5]:
In [6]:
df_males = df[df['Gender'] == 'Male']
In [7]:
df_females = df[df['Gender'] == 'Female']
In [8]:
plt.style.use('fivethirtyeight')
In [9]:
df_females.median()
Out[9]:
In [10]:
df_males.median()
Out[10]:
In [11]:
df_females.mean()
Out[11]:
In [13]:
df_males.mean()
Out[13]:
In [16]:
df_females['Height'].max() - df_females['Height'].min()
Out[16]:
In [17]:
df_females['Weight'].max() - df_females['Weight'].min()
Out[17]:
In [18]:
df_males['Height'].max() - df_males['Height'].min()
Out[18]:
In [19]:
df_males['Weight'].max() - df_males['Weight'].min()
Out[19]:
In [55]:
iqr_f = df_females.quantile(q=0.75)- df_females.quantile(q=0.25)
iqr_f
Out[55]:
In [56]:
iqr_m = df_males.quantile(q=0.75)- df_males.quantile(q=0.25)
iqr_m
Out[56]:
In [64]:
UAL_f= (iqr_f*1.5) + df_females.quantile(q=0.75)
UAL_f
Out[64]:
In [65]:
LAL_f= df_females.quantile(q=0.25) - (iqr_f*1.5)
LAL_f
Out[65]:
In [66]:
UAL_m= (iqr_m*1.5) + df_males.quantile(q=0.75)
UAL_m
Out[66]:
In [67]:
LAL_m= df_males.quantile(q=0.25) - (iqr_m*1.5)
LAL_m
Out[67]:
In [16]:
df_females.corr()
Out[16]:
In [10]:
df_males.corr()
Out[10]:
In [50]:
len(df_males)
Out[50]:
In [34]:
len(df_females[df_females['Height']> 71.284662])
Out[34]:
In [35]:
len(df_females[df_females['Height']< 56.173345]) #In total 25 outliers
Out[35]:
In [69]:
len(df_females[df_females['Weight']> 188.515978])
Out[69]:
In [70]:
len(df_females[df_females['Weight']< 83.229044]) #In total 25 outliers
Out[70]:
In [40]:
len(df_males[df_males['Height']> 76.709840])
Out[40]:
In [44]:
len(df_males[df_males['Height']< 61.453582]) #In total 46 outliers
Out[44]:
In [45]:
len(df_males[df_males['Weight']>240.062854])
Out[45]:
In [71]:
len(df_males[df_males['Weight']< 134.182716]) #In total 46 outliers
Out[71]:
In [17]:
#fig, ax = plt.subplots()
ax= df_males.plot(kind='scatter', y='Height', x='Weight', color='darkblue', figsize= (7,5))
ax_f= df_females.plot(kind='scatter', y='Height', x='Weight', color='orange', figsize= (7,5))
ax_f.set_ylim([50, 75])
ax_f.set_xlim([60, 220])
Out[17]:
The correlation coeficient of weight and height for men and women is very similar, with 84% for women and 86% in men.
In [ ]:
In [ ]: