Assignment 3

Using the heights_weights_genders.csv, analyze the difference between the height weight correlation in women and men.


In [87]:
import pandas as pd
%matplotlib inline

In [88]:
df = pd.read_csv("heights_weights_genders.csv")

In [89]:
df


Out[89]:
Gender Height Weight
0 Male 73.847017 241.893563
1 Male 68.781904 162.310473
2 Male 74.110105 212.740856
3 Male 71.730978 220.042470
4 Male 69.881796 206.349801
5 Male 67.253016 152.212156
6 Male 68.785081 183.927889
7 Male 68.348516 167.971110
8 Male 67.018950 175.929440
9 Male 63.456494 156.399676
10 Male 71.195382 186.604926
11 Male 71.640805 213.741169
12 Male 64.766329 167.127461
13 Male 69.283070 189.446181
14 Male 69.243732 186.434168
15 Male 67.645620 172.186930
16 Male 72.418317 196.028506
17 Male 63.974326 172.883470
18 Male 69.640060 185.983958
19 Male 67.936005 182.426648
20 Male 67.915050 174.115929
21 Male 69.439440 197.731422
22 Male 66.149132 149.173566
23 Male 75.205974 228.761781
24 Male 67.893196 162.006652
25 Male 68.144033 192.343977
26 Male 69.089631 184.435174
27 Male 72.800844 206.828189
28 Male 67.421242 175.213922
29 Male 68.496415 154.342639
... ... ... ...
9970 Female 65.618737 151.500389
9971 Female 64.640247 155.318297
9972 Female 60.653733 123.084293
9973 Female 60.737031 120.926500
9974 Female 65.393947 143.017835
9975 Female 66.251923 124.019917
9976 Female 61.475904 121.387236
9977 Female 64.494838 149.402547
9978 Female 57.375759 114.192209
9979 Female 62.056012 125.135897
9980 Female 60.472262 110.768229
9981 Female 60.443264 135.559390
9982 Female 69.868511 177.992066
9983 Female 65.830726 132.827889
9984 Female 59.047029 111.707369
9985 Female 68.041065 170.514213
9986 Female 63.352698 141.906510
9987 Female 65.610243 151.169475
9988 Female 59.538729 121.244876
9989 Female 60.955084 95.686674
9990 Female 63.179498 141.266100
9991 Female 62.636675 102.853563
9992 Female 62.077832 138.691680
9993 Female 60.030434 97.687432
9994 Female 59.098250 110.529686
9995 Female 66.172652 136.777454
9996 Female 67.067155 170.867906
9997 Female 63.867992 128.475319
9998 Female 69.034243 163.852461
9999 Female 61.944246 113.649103

10000 rows × 3 columns


In [90]:
df.head()


Out[90]:
Gender Height Weight
0 Male 73.847017 241.893563
1 Male 68.781904 162.310473
2 Male 74.110105 212.740856
3 Male 71.730978 220.042470
4 Male 69.881796 206.349801

In [91]:
male_df = df[(df['Gender'] == 'Male')].describe()
male_df


Out[91]:
Height Weight
count 5000.000000 5000.000000
mean 69.026346 187.020621
std 2.863362 19.781155
min 58.406905 112.902939
25% 67.174679 173.887767
50% 69.027709 187.033546
75% 70.988744 200.357802
max 78.998742 269.989699

In [92]:
male_df.describe()


Out[92]:
Height Weight
count 8.000000 8.000000
mean 677.060811 768.871691
std 1746.893199 1711.184295
min 2.863362 19.781155
25% 64.982736 158.641560
50% 69.027027 187.027083
75% 72.991243 217.765776
max 5000.000000 5000.000000

In [93]:
male_df.quantile(q=0.25)


Out[93]:
Height     64.982736
Weight    158.641560
dtype: float64

In [94]:
male_df.quantile(q=0.5)


Out[94]:
Height     69.027027
Weight    187.027083
dtype: float64

In [95]:
male_df.quantile(q=0.75)


Out[95]:
Height     72.991243
Weight    217.765776
dtype: float64

In [96]:
male_iqr = male_df.quantile(q=0.75) - male_df.quantile(q=0.25)
male_iqr


Out[96]:
Height     8.008508
Weight    59.124216
dtype: float64

In [97]:
male_df.quantile(q=0.75) + (iqr*1.5)


Out[97]:
Height     78.712340
Weight    257.470828
dtype: float64

In [98]:
male_df.quantile(q=0.25) - (iqr*1.5)


Out[98]:
Height     59.261639
Weight    118.936509
dtype: float64

In [99]:
male_df.std()


Out[99]:
Height    1746.893199
Weight    1711.184295
dtype: float64

In [112]:
male_df.plot(kind='scatter', y='Height', x='Weight')


Out[112]:
<matplotlib.axes._subplots.AxesSubplot at 0x11261cba8>

In [100]:
female_df = df[(df['Gender'] == 'Female')]
female_df


Out[100]:
Gender Height Weight
5000 Female 58.910732 102.088326
5001 Female 65.230013 141.305823
5002 Female 63.369004 131.041403
5003 Female 64.479997 128.171511
5004 Female 61.793096 129.781407
5005 Female 65.968019 156.802083
5006 Female 62.850379 114.969038
5007 Female 65.652156 165.083001
5008 Female 61.890234 111.676199
5009 Female 63.677868 104.151560
5010 Female 68.101172 166.575661
5011 Female 61.798879 106.233687
5012 Female 63.371459 128.118169
5013 Female 58.895886 101.682613
5014 Female 58.438249 98.192621
5015 Female 60.809799 126.915463
5016 Female 70.128653 151.254270
5017 Female 62.257430 115.797393
5018 Female 61.735090 107.866872
5019 Female 63.059557 145.589929
5020 Female 62.286838 139.522708
5021 Female 61.827478 122.766167
5022 Female 66.347537 157.380965
5023 Female 65.320632 145.037376
5024 Female 66.103873 148.645183
5025 Female 64.527182 132.680868
5026 Female 56.547975 84.872124
5027 Female 62.739281 138.530421
5028 Female 61.585199 137.425287
5029 Female 62.024425 124.603941
... ... ... ...
9970 Female 65.618737 151.500389
9971 Female 64.640247 155.318297
9972 Female 60.653733 123.084293
9973 Female 60.737031 120.926500
9974 Female 65.393947 143.017835
9975 Female 66.251923 124.019917
9976 Female 61.475904 121.387236
9977 Female 64.494838 149.402547
9978 Female 57.375759 114.192209
9979 Female 62.056012 125.135897
9980 Female 60.472262 110.768229
9981 Female 60.443264 135.559390
9982 Female 69.868511 177.992066
9983 Female 65.830726 132.827889
9984 Female 59.047029 111.707369
9985 Female 68.041065 170.514213
9986 Female 63.352698 141.906510
9987 Female 65.610243 151.169475
9988 Female 59.538729 121.244876
9989 Female 60.955084 95.686674
9990 Female 63.179498 141.266100
9991 Female 62.636675 102.853563
9992 Female 62.077832 138.691680
9993 Female 60.030434 97.687432
9994 Female 59.098250 110.529686
9995 Female 66.172652 136.777454
9996 Female 67.067155 170.867906
9997 Female 63.867992 128.475319
9998 Female 69.034243 163.852461
9999 Female 61.944246 113.649103

5000 rows × 3 columns


In [114]:
female_df.describe()


Out[114]:
Height Weight
count 5000.000000 5000.000000
mean 63.708774 135.860093
std 2.696284 19.022468
min 54.263133 64.700127
25% 61.894441 122.934096
50% 63.730924 136.117583
75% 65.563565 148.810926
max 73.389586 202.237214

In [115]:
female_df.median()


Out[115]:
Height     63.730924
Weight    136.117583
dtype: float64

In [116]:
female_df.quantile(q=0.25)


Out[116]:
Height     61.894441
Weight    122.934096
dtype: float64

In [117]:
female_df.quantile(q=0.5)


Out[117]:
Height     63.730924
Weight    136.117583
dtype: float64

In [118]:
female_df.quantile(q=0.75)


Out[118]:
Height     65.563565
Weight    148.810926
dtype: float64

In [119]:
female_iqr = female_df.quantile(q=0.75) - female_df.quantile(q=0.25)
female_iqr


Out[119]:
Height     3.669124
Weight    25.876830
dtype: float64

In [120]:
female_df.quantile(q=0.75) + (iqr*1.5)


Out[120]:
Height     71.284662
Weight    188.515978
dtype: float64

In [121]:
female_df.quantile(q=0.25) - (iqr*1.5)


Out[121]:
Height    56.173345
Weight    83.229044
dtype: float64

In [122]:
female_df.std()


Out[122]:
Height     2.696284
Weight    19.022468
dtype: float64

In [123]:
female_df.corr()


Out[123]:
Height Weight
Height 1.000000 0.849609
Weight 0.849609 1.000000

In [124]:
female_df.plot(kind='scatter', y='Height', x='Weight')


Out[124]:
<matplotlib.axes._subplots.AxesSubplot at 0x112845f98>

In [ ]: