notebook.community

Edit and run



In [1]:

    
import pandas as pd
%matplotlib inline



In [2]:

    
df = pd.read_csv("data/Data_Collection_Sheet.csv")



In [4]:

    
df["height (inches)"].hist(bins=5)









    Out[4]:





<matplotlib.axes._subplots.AxesSubplot at 0x7f64bdc797f0>



In [7]:

    
df.columns









    Out[7]:





Index(['name', 'height (inches)', 'age (years)',
       'siblings (not including you)'],
      dtype='object')



In [8]:

    
df['siblings (not including you)'].hist()









    Out[8]:





<matplotlib.axes._subplots.AxesSubplot at 0x7f64bb34f0f0>



In [9]:

    
df['siblings (not including you)'].mean()









    Out[9]:





1.4761904761904763



In [11]:

    
df[df['height (inches)'] > 75]









    Out[11]:






  
    
      
      name
      height (inches)
      age (years)
      siblings (not including you)
    
  
  
    
      18
      JJ Gallagher
      77.0
      36
      2.0



In [13]:

    
df[df['height (inches)'] < 59]









    Out[13]:






  
    
      
      name
      height (inches)
      age (years)
      siblings (not including you)
    
  
  
    
      0
      Radhika pc
      55.0
      35
      1.0
    
    
      17
      Jing Yi Hon
      58.0
      28
      1.0



In [14]:

    
df = pd.read_excel("data/height_weight.xlsx")



In [18]:

    
df.plot(kind='scatter',y='weight',x='height')









    Out[18]:





<matplotlib.axes._subplots.AxesSubplot at 0x7f64bb607748>



In [19]:

    
df.corr()

A simple representation of recursion

</a>

Example of finding outliers by finding 1.5 * IQR for our height dataset

Calculating the standard deviations from the mean for our height data



In [ ]: