Assignment 3

Using the heights_weights_genders.csv, analyze the difference between the height weight correlation in women and men.


In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use('ggplot')
import dateutil.parser
import pg8000
from pandas import DataFrame

In [2]:
df = pd.read_csv("heights_weights_genders.csv")

In [3]:
df.head()


Out[3]:
Gender Height Weight
0 Male 73.847017 241.893563
1 Male 68.781904 162.310473
2 Male 74.110105 212.740856
3 Male 71.730978 220.042470
4 Male 69.881796 206.349801

In [14]:
male = df[df['Gender']=='Male']

In [15]:
male['Height'].median()


Out[15]:
69.02770850939555

In [16]:
male['Weight'].median()


Out[16]:
187.033546088862

In [17]:
male.plot(kind='scatter', x='Height', y='Weight')
plt.xlabel('Heights of Male Population')
plt.ylabel('Weights of Male Population')


Out[17]:
<matplotlib.text.Text at 0x7dc5910>

In [25]:
male.boxplot()


c:\users\radhika\appdata\local\programs\python\python35-32\lib\site-packages\ipykernel\__main__.py:1: FutureWarning: 
The default value for 'return_type' will change to 'axes' in a future release.
 To use the future behavior now, set return_type='axes'.
 To keep the previous behavior and silence this warning, set return_type='dict'.
  if __name__ == '__main__':
Out[25]:
{'boxes': [<matplotlib.lines.Line2D at 0x92aae10>,
  <matplotlib.lines.Line2D at 0x92b8c70>],
 'caps': [<matplotlib.lines.Line2D at 0x92b1d70>,
  <matplotlib.lines.Line2D at 0x92b1e10>,
  <matplotlib.lines.Line2D at 0x92bdeb0>,
  <matplotlib.lines.Line2D at 0x92c3410>],
 'fliers': [<matplotlib.lines.Line2D at 0x92b8bd0>,
  <matplotlib.lines.Line2D at 0x92c3d90>],
 'means': [],
 'medians': [<matplotlib.lines.Line2D at 0x92b82f0>,
  <matplotlib.lines.Line2D at 0x92c34b0>],
 'whiskers': [<matplotlib.lines.Line2D at 0x92aaef0>,
  <matplotlib.lines.Line2D at 0x92b18b0>,
  <matplotlib.lines.Line2D at 0x92bd5d0>,
  <matplotlib.lines.Line2D at 0x92bda90>]}

In [18]:
female = df[df['Gender']=='Female']

In [19]:
female['Height'].median()


Out[19]:
63.7309238591475

In [20]:
female['Weight'].median()


Out[20]:
136.11758297008498

In [21]:
female.plot(kind='scatter', x='Height', y='Weight')
plt.xlabel('Heights of Male Population')
plt.ylabel('Weights of Male Population')


Out[21]:
<matplotlib.text.Text at 0x7e05110>

In [9]:
male.corr(method='pearson', min_periods=1)


Out[9]:
Height Weight
Height 1.000000 0.849609
Weight 0.849609 1.000000

In [24]:
female.boxplot()


c:\users\radhika\appdata\local\programs\python\python35-32\lib\site-packages\ipykernel\__main__.py:1: FutureWarning: 
The default value for 'return_type' will change to 'axes' in a future release.
 To use the future behavior now, set return_type='axes'.
 To keep the previous behavior and silence this warning, set return_type='dict'.
  if __name__ == '__main__':
Out[24]:
{'boxes': [<matplotlib.lines.Line2D at 0x925ccf0>,
  <matplotlib.lines.Line2D at 0x9267b50>],
 'caps': [<matplotlib.lines.Line2D at 0x9262c50>,
  <matplotlib.lines.Line2D at 0x9262cf0>,
  <matplotlib.lines.Line2D at 0x926edb0>,
  <matplotlib.lines.Line2D at 0x926ee50>],
 'fliers': [<matplotlib.lines.Line2D at 0x9267ab0>,
  <matplotlib.lines.Line2D at 0x9272c10>],
 'means': [],
 'medians': [<matplotlib.lines.Line2D at 0x92671d0>,
  <matplotlib.lines.Line2D at 0x9272330>],
 'whiskers': [<matplotlib.lines.Line2D at 0x925cdd0>,
  <matplotlib.lines.Line2D at 0x9262790>,
  <matplotlib.lines.Line2D at 0x9267ff0>,
  <matplotlib.lines.Line2D at 0x926e8f0>]}

In [ ]: