notebook.community

Edit and run



In [1]:

    
import pandas as pd
%matplotlib inline



In [2]:

    
#read in the file
df = pd.read_excel("height_weight.xlsx")



In [3]:

    
df



In [4]:

    
#create a histogram of the data
df.hist()









    Out[4]:





array([[<matplotlib.axes.AxesSubplot object at 0x10739aed0>,
        <matplotlib.axes.AxesSubplot object at 0x107465410>]], dtype=object)



In [5]:

    
#change the bin size to 5
df.hist(bins=5)









    Out[5]:





array([[<matplotlib.axes.AxesSubplot object at 0x107418290>,
        <matplotlib.axes.AxesSubplot object at 0x1077fd810>]], dtype=object)



In [6]:

    
#calculate the mean
df.mean()









    Out[6]:





height     62.336842
weight    100.026316
dtype: float64



In [7]:

    
#calculate the median
df.median()









    Out[7]:





height    62.8
weight    99.5
dtype: float64



In [8]:

    
#calculate the mode
df.mode()



In [11]:

    
#calculate the range
df['height'].max() - df['height'].min()









    Out[11]:





20.700000000000003



In [12]:

    
#calculate the range
df['weight'].max() - df['weight'].min()









    Out[12]:





99.5



In [13]:

    
#calculate the 1st quartile
df.quantile(q=0.25)









    Out[13]:





height    58.25
weight    84.25
dtype: float64



In [14]:

    
#calculate the 2nd quartile (median)
df.quantile(q=0.5)









    Out[14]:





height    62.8
weight    99.5
dtype: float64



In [15]:

    
#calculate the 3rd quartile
df.quantile(q=0.75)









    Out[15]:





height     65.90
weight    112.25
dtype: float64



In [16]:

    
#calculate the interquartile range
df['height'].quantile(q=0.75) - df['height'].quantile(q=0.25)









    Out[16]:





7.6500000000000057



In [17]:

    
#calculate the interquartile range
df['weight'].quantile(q=0.75) - df['weight'].quantile(q=0.25)









    Out[17]:





28.0



In [10]:

    
# calculate the standard deviation
df.std()









    Out[10]:





height     5.127075
weight    22.773933
dtype: float64



In [18]:

    
# or we could do this much easier
df.describe()



In [ ]:



In [ ]:



In [ ]:



In [ ]:

	name	height	weight
0	Joyce	51.3	50.5
1	Louise	56.3	77.0
2	Alice	56.5	84.0
3	James	57.3	83.0
4	Thomas	57.5	85.0
5	John	59.0	99.5
6	Jane	59.8	84.5
7	Jeffrey	62.5	84.0
8	Janet	62.5	112.5
9	Carol	62.8	102.5
10	Henry	63.5	102.5
11	Judy	64.3	90.0
12	Robert	64.8	128.0
13	Barbara	65.3	98.0
14	Mary	66.5	112.0
15	William	66.5	112.0
16	Ronald	67.0	133.0
17	Alfred	69.0	112.5
18	Philip	72.0	150.0

	name	height	weight
0	NaN	62.5	84.0
1	NaN	66.5	102.5
2	NaN	NaN	112.0
3	NaN	NaN	112.5

	height	weight
count	19.000000	19.000000
mean	62.336842	100.026316
std	5.127075	22.773933
min	51.300000	50.500000
25%	58.250000	84.250000
50%	62.800000	99.500000
75%	65.900000	112.250000
max	72.000000	150.000000