In [1]:
import pandas as pd
%matplotlib inline
In [2]:
#read in the file
df = pd.read_excel("height_weight.xlsx")
In [3]:
df
Out[3]:
In [4]:
#create a histogram of the data
df.hist()
Out[4]:
In [5]:
#change the bin size to 5
df.hist(bins=5)
Out[5]:
In [6]:
#calculate the mean
df.mean()
Out[6]:
In [7]:
#calculate the median
df.median()
Out[7]:
In [8]:
#calculate the mode
df.mode()
Out[8]:
In [11]:
#calculate the range
df['height'].max() - df['height'].min()
Out[11]:
In [12]:
#calculate the range
df['weight'].max() - df['weight'].min()
Out[12]:
In [13]:
#calculate the 1st quartile
df.quantile(q=0.25)
Out[13]:
In [14]:
#calculate the 2nd quartile (median)
df.quantile(q=0.5)
Out[14]:
In [15]:
#calculate the 3rd quartile
df.quantile(q=0.75)
Out[15]:
In [16]:
#calculate the interquartile range
df['height'].quantile(q=0.75) - df['height'].quantile(q=0.25)
Out[16]:
In [17]:
#calculate the interquartile range
df['weight'].quantile(q=0.75) - df['weight'].quantile(q=0.25)
Out[17]:
In [10]:
# calculate the standard deviation
df.std()
Out[10]:
In [18]:
# or we could do this much easier
df.describe()
Out[18]:
In [ ]:
In [ ]:
In [ ]:
In [ ]: