In [10]:
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

In [3]:
df = pd.read_csv('data/heights_weights_genders.csv')

In [4]:
print(df.head)


<bound method NDFrame.head of       Gender     Height      Weight
0       Male  73.847017  241.893563
1       Male  68.781904  162.310473
2       Male  74.110105  212.740856
3       Male  71.730978  220.042470
4       Male  69.881796  206.349801
5       Male  67.253016  152.212156
6       Male  68.785081  183.927889
7       Male  68.348516  167.971110
8       Male  67.018950  175.929440
9       Male  63.456494  156.399676
10      Male  71.195382  186.604926
11      Male  71.640805  213.741169
12      Male  64.766329  167.127461
13      Male  69.283070  189.446181
14      Male  69.243732  186.434168
15      Male  67.645620  172.186930
16      Male  72.418317  196.028506
17      Male  63.974326  172.883470
18      Male  69.640060  185.983958
19      Male  67.936005  182.426648
20      Male  67.915050  174.115929
21      Male  69.439440  197.731422
22      Male  66.149132  149.173566
23      Male  75.205974  228.761781
24      Male  67.893196  162.006652
25      Male  68.144033  192.343977
26      Male  69.089631  184.435174
27      Male  72.800844  206.828189
28      Male  67.421242  175.213922
29      Male  68.496415  154.342639
...      ...        ...         ...
9970  Female  65.618737  151.500389
9971  Female  64.640247  155.318297
9972  Female  60.653733  123.084293
9973  Female  60.737031  120.926500
9974  Female  65.393947  143.017835
9975  Female  66.251923  124.019917
9976  Female  61.475904  121.387236
9977  Female  64.494838  149.402547
9978  Female  57.375759  114.192209
9979  Female  62.056012  125.135897
9980  Female  60.472262  110.768229
9981  Female  60.443264  135.559390
9982  Female  69.868511  177.992066
9983  Female  65.830726  132.827889
9984  Female  59.047029  111.707369
9985  Female  68.041065  170.514213
9986  Female  63.352698  141.906510
9987  Female  65.610243  151.169475
9988  Female  59.538729  121.244876
9989  Female  60.955084   95.686674
9990  Female  63.179498  141.266100
9991  Female  62.636675  102.853563
9992  Female  62.077832  138.691680
9993  Female  60.030434   97.687432
9994  Female  59.098250  110.529686
9995  Female  66.172652  136.777454
9996  Female  67.067155  170.867906
9997  Female  63.867992  128.475319
9998  Female  69.034243  163.852461
9999  Female  61.944246  113.649103

[10000 rows x 3 columns]>

In [5]:
df.corr()


Out[5]:
Height Weight
Height 1.000000 0.924756
Weight 0.924756 1.000000

In [6]:
Male = df[df['Gender'] == 'Male']
Female = df[df['Gender'] == 'Female']

In [7]:
Male.corr()


Out[7]:
Height Weight
Height 1.000000 0.862979
Weight 0.862979 1.000000

In [8]:
Female.corr()


Out[8]:
Height Weight
Height 1.000000 0.849609
Weight 0.849609 1.000000

Findings: For males there was a slightly higher positive correlation between height and weight at 86.29% whereas for females the correlation was 84.96%. Although, I am unclear why prior to seperating by gender the correlariton was 92% which is higher than both groups as individuals.


In [19]:
ax= Male.plot(kind='scatter', x='Height', y='Weight', alpha=0.1)
ax.set_title('Male Height Vs. Weight ')
ax.set_xlabel('Height (In.)')
ax.set_ylabel('Weight (Lbs.)')


Out[19]:
<matplotlib.text.Text at 0x112d909b0>

In [20]:
ax = Female.plot(kind='scatter', x='Height', y='Weight', alpha=0.1)
ax.set_title('Female Height Vs. Weight ')
ax.set_xlabel('Height (In.)')
ax.set_ylabel('Weight (Lbs.)')


Out[20]:
<matplotlib.text.Text at 0x112db55c0>

In [ ]:


In [ ]: