notebook.community

Edit and run



In [1]:

    
%matplotlib inline
import numpy as np
import scipy.stats as stats
import matplotlib.pyplot as plt
import pandas as pd



In [2]:

    
births_url = "https://goo.gl/pFAL23"

births = pd.read_table(births_url)



In [4]:

    
births.head(n=10)









    Out[4]:






  
    
      
      fAge
      mAge
      weeks
      premature
      visits
      gained
      weight
      sexBaby
      smoke
    
  
  
    
      0
      31
      30
      39
      full term
      13
      1
      6.88
      male
      smoker
    
    
      1
      34
      36
      39
      full term
      5
      35
      7.69
      male
      nonsmoker
    
    
      2
      36
      35
      40
      full term
      12
      29
      8.88
      male
      nonsmoker
    
    
      3
      41
      40
      40
      full term
      13
      30
      9.00
      female
      nonsmoker
    
    
      4
      42
      37
      40
      full term
      NaN
      10
      7.94
      male
      nonsmoker
    
    
      5
      37
      28
      40
      full term
      12
      35
      8.25
      male
      smoker
    
    
      6
      35
      35
      28
      premie
      6
      29
      1.63
      female
      nonsmoker
    
    
      7
      28
      21
      35
      premie
      9
      15
      5.50
      female
      smoker
    
    
      8
      22
      20
      32
      premie
      5
      40
      2.69
      male
      smoker
    
    
      9
      36
      25
      40
      full term
      13
      34
      8.75
      female
      nonsmoker



In [6]:

    
births.visits.isnull().sum()









    Out[6]:





1



In [7]:

    
births.isnull().sum()









    Out[7]:





fAge         31
mAge          0
weeks         0
premature     0
visits        1
gained        2
weight        0
sexBaby       0
smoke         0
dtype: int64



In [10]:

    
births.smoke.unique()









    Out[10]:





array(['smoker', 'nonsmoker'], dtype=object)



In [11]:

    
nfullterm = births.premature == "full term"
nfullterm.sum()









    Out[11]:





129



In [12]:

    
npremie = births.premature == "premie"
npremie.sum()









    Out[12]:





21



In [13]:

    
births[npremie]



In [20]:

    
#babyGirls = births.query('sexBaby == "female"')
#babyGirls = births[births.sexBaby == "female"]
isgirl = births.sexBaby == "female"
births[isgirl].shape









    Out[20]:





(68, 9)



In [16]:

    
babyGirls.shape









    Out[16]:





(68, 9)



In [21]:

    
isboy = births.sexBaby == "male"
births[isboy].shape









    Out[21]:





(82, 9)



In [22]:

    
ispremie = births.premature == "premie"



In [23]:

    
premieGirls = births[isgirl & ispremie]



In [24]:

    
premieGirls









    Out[24]:






  
    
      
      fAge
      mAge
      weeks
      premature
      visits
      gained
      weight
      sexBaby
      smoke
    
  
  
    
      6
      35
      35
      28
      premie
      6
      29
      1.63
      female
      nonsmoker
    
    
      7
      28
      21
      35
      premie
      9
      15
      5.50
      female
      smoker
    
    
      10
      27
      19
      32
      premie
      5
      32
      6.50
      female
      nonsmoker
    
    
      16
      33
      40
      36
      premie
      13
      23
      7.81
      female
      nonsmoker
    
    
      21
      NaN
      38
      32
      premie
      10
      16
      2.19
      female
      smoker
    
    
      91
      NaN
      41
      33
      premie
      13
      0
      5.69
      female
      nonsmoker
    
    
      130
      17
      17
      29
      premie
      4
      10
      2.63
      female
      nonsmoker
    
    
      140
      NaN
      23
      36
      premie
      2
      27
      5.88
      female
      nonsmoker



In [26]:

    
isbig = births.weight > 9
isbig.sum()









    Out[26]:





7



In [32]:

    
boys = births.sexBaby != "female"



In [30]:

    
births[nofAge]









    



  File "<ipython-input-30-3871e93f3198>", line 1
    births[!nofAge]
           ^
SyntaxError: invalid syntax



In [39]:

    
births.groupby(["smoke","sexBaby"])









    Out[39]:





<pandas.core.groupby.DataFrameGroupBy object at 0x7f056cdadeb8>



In [42]:

    
g = births.groupby('smoke').describe()
g



In [43]:

    
## Matplotlib



In [46]:

    
plt.scatter(births.mAge, births.fAge)
plt.xlabel("Mother's Age")
plt.ylabel("Father's Age")
plt.title("My useful title")









    Out[46]:





<matplotlib.text.Text at 0x7f056a451438>



In [47]:

    
fig = plt.figure()
axes = fig.add_axes([0.1, 0.1, 0.8, 0.8])



In [49]:

    
fig = plt.figure()
axes = fig.add_axes([0.1, 0.1, 0.8, 0.8])

axes.scatter(births.mAge, births.fAge)
axes.set_xlabel("Age of Mother")
axes.set_ylabel("Age of Father")

pass



In [57]:

    
fig = plt.figure(figsize=(6,6))
axes = fig.add_axes([0.1, 0.1, 0.5, 0.5])

axes.scatter(births.mAge, births.fAge)
axes.set_xlabel("Age of Mother")
axes.set_ylabel("Age of Father")

rightax = fig.add_axes([0.7, 0.1, 0.25, 0.5])
rightax.hist(births.mAge, normed=True,orientation="horizontal" )


above = fig.add_axes([0.1, 0.7, 0.5, 0.25])
above.hist(births.mAge, normed=True)
above.set_xlim(10,50)

pass



In [ ]:

		fAge	gained	mAge	visits	weeks	weight
smoke
nonsmoker	count	85.000000	99.000000	100.000000	99.000000	100.000000	100.000000
	mean	29.811765	32.545455	26.900000	11.858586	38.550000	7.179500
	std	6.182722	15.231059	6.342895	3.490541	2.875796	1.434152
	min	17.000000	0.000000	15.000000	2.000000	26.000000	1.630000
	25%	25.000000	23.000000	22.000000	10.000000	38.000000	6.702500
	50%	30.000000	31.000000	25.000000	12.000000	39.000000	7.440000
	75%	35.000000	40.000000	32.000000	14.500000	40.000000	8.060000
	max	47.000000	85.000000	41.000000	19.000000	44.000000	10.130000
smoker	count	34.000000	49.000000	50.000000	50.000000	50.000000	50.000000
	mean	29.705882	32.265306	26.000000	10.800000	38.540000	6.779000
	std	6.147330	16.646791	5.993193	3.843893	2.500694	1.597415
	min	20.000000	0.000000	16.000000	3.000000	32.000000	1.690000
	25%	25.000000	22.000000	21.250000	8.000000	38.000000	6.220000
	50%	29.000000	30.000000	25.500000	11.500000	39.000000	6.970000
	75%	33.000000	40.000000	30.000000	14.000000	40.000000	7.810000
	max	46.000000	75.000000	39.000000	17.000000	44.000000	9.130000

	fAge	mAge	weeks	premature	visits	gained	weight	sexBaby	smoke
0	31	30	39	full term	13	1	6.88	male	smoker
1	34	36	39	full term	5	35	7.69	male	nonsmoker
2	36	35	40	full term	12	29	8.88	male	nonsmoker
3	41	40	40	full term	13	30	9.00	female	nonsmoker
4	42	37	40	full term	NaN	10	7.94	male	nonsmoker
5	37	28	40	full term	12	35	8.25	male	smoker
6	35	35	28	premie	6	29	1.63	female	nonsmoker
7	28	21	35	premie	9	15	5.50	female	smoker
8	22	20	32	premie	5	40	2.69	male	smoker
9	36	25	40	full term	13	34	8.75	female	nonsmoker