In [1]:
import pandas as pd
%matplotlib inline

In [2]:
#read in the file
df = pd.read_excel("height_weight.xlsx")

In [3]:
df


Out[3]:
name height weight
0 Joyce 51.3 50.5
1 Louise 56.3 77.0
2 Alice 56.5 84.0
3 James 57.3 83.0
4 Thomas 57.5 85.0
5 John 59.0 99.5
6 Jane 59.8 84.5
7 Jeffrey 62.5 84.0
8 Janet 62.5 112.5
9 Carol 62.8 102.5
10 Henry 63.5 102.5
11 Judy 64.3 90.0
12 Robert 64.8 128.0
13 Barbara 65.3 98.0
14 Mary 66.5 112.0
15 William 66.5 112.0
16 Ronald 67.0 133.0
17 Alfred 69.0 112.5
18 Philip 72.0 150.0

In [4]:
#create a histogram of the data
df.hist()


Out[4]:
array([[<matplotlib.axes.AxesSubplot object at 0x10739aed0>,
        <matplotlib.axes.AxesSubplot object at 0x107465410>]], dtype=object)

In [5]:
#change the bin size to 5
df.hist(bins=5)


Out[5]:
array([[<matplotlib.axes.AxesSubplot object at 0x107418290>,
        <matplotlib.axes.AxesSubplot object at 0x1077fd810>]], dtype=object)

In [6]:
#calculate the mean
df.mean()


Out[6]:
height     62.336842
weight    100.026316
dtype: float64

In [7]:
#calculate the median
df.median()


Out[7]:
height    62.8
weight    99.5
dtype: float64

In [8]:
#calculate the mode
df.mode()


Out[8]:
name height weight
0 NaN 62.5 84.0
1 NaN 66.5 102.5
2 NaN NaN 112.0
3 NaN NaN 112.5

In [11]:
#calculate the range
df['height'].max() - df['height'].min()


Out[11]:
20.700000000000003

In [12]:
#calculate the range
df['weight'].max() - df['weight'].min()


Out[12]:
99.5

In [13]:
#calculate the 1st quartile
df.quantile(q=0.25)


Out[13]:
height    58.25
weight    84.25
dtype: float64

In [14]:
#calculate the 2nd quartile (median)
df.quantile(q=0.5)


Out[14]:
height    62.8
weight    99.5
dtype: float64

In [15]:
#calculate the 3rd quartile
df.quantile(q=0.75)


Out[15]:
height     65.90
weight    112.25
dtype: float64

In [16]:
#calculate the interquartile range
df['height'].quantile(q=0.75) - df['height'].quantile(q=0.25)


Out[16]:
7.6500000000000057

In [17]:
#calculate the interquartile range
df['weight'].quantile(q=0.75) - df['weight'].quantile(q=0.25)


Out[17]:
28.0

In [10]:
# calculate the standard deviation
df.std()


Out[10]:
height     5.127075
weight    22.773933
dtype: float64

In [18]:
# or we could do this much easier
df.describe()


Out[18]:
height weight
count 19.000000 19.000000
mean 62.336842 100.026316
std 5.127075 22.773933
min 51.300000 50.500000
25% 58.250000 84.250000
50% 62.800000 99.500000
75% 65.900000 112.250000
max 72.000000 150.000000

In [ ]:


In [ ]:


In [ ]:


In [ ]: