In [1]:
import pandas as pd
%matplotlib inline

In [2]:
df = pd.read_csv("data/Data_Collection_Sheet.csv")

In [4]:
df["height (inches)"].hist(bins=5)


Out[4]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f64bdc797f0>

In [7]:
df.columns


Out[7]:
Index(['name', 'height (inches)', 'age (years)',
       'siblings (not including you)'],
      dtype='object')

In [8]:
df['siblings (not including you)'].hist()


Out[8]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f64bb34f0f0>

In [9]:
df['siblings (not including you)'].mean()


Out[9]:
1.4761904761904763

In [11]:
df[df['height (inches)'] > 75]


Out[11]:
name height (inches) age (years) siblings (not including you)
18 JJ Gallagher 77.0 36 2.0

In [13]:
df[df['height (inches)'] < 59]


Out[13]:
name height (inches) age (years) siblings (not including you)
0 Radhika pc 55.0 35 1.0
17 Jing Yi Hon 58.0 28 1.0

In [14]:
df = pd.read_excel("data/height_weight.xlsx")

In [18]:
df.plot(kind='scatter',y='weight',x='height')


Out[18]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f64bb607748>

In [19]:
df.corr()


Out[19]:
height weight
height 1.000000 0.877785
weight 0.877785 1.000000

A simple representation of recursion

</a>

Example of finding outliers by finding 1.5 * IQR for our height dataset

Calculating the standard deviations from the mean for our height data


In [ ]: