In [1]:
"""
My code is even better than the author's code (at least this is true for Chapter 1)
Let's create more fun in statistics learning!
"""
import pandas as pd
import numpy as np

In [5]:
preg_raw = pd.read_csv("2002FemPregOut.csv")
reps_raw = pd.read_csv("2002FemRespOut.csv")

In [6]:
preg_raw.head()


Out[6]:
caseid nbrnaliv babysex birthwgt_lb birthwgt_oz prglngth outcome birthord agepreg finalwgt
0 1 1.0 1.0 8.0 13.0 39 1 1.0 3316.0 6448.271112
1 1 1.0 2.0 7.0 14.0 39 1 2.0 3925.0 6448.271112
2 2 3.0 1.0 9.0 2.0 39 1 1.0 1433.0 12999.542264
3 2 1.0 2.0 7.0 0.0 39 1 2.0 1783.0 12999.542264
4 2 1.0 2.0 6.0 3.0 39 1 3.0 1833.0 12999.542264

In [9]:
print preg_raw.isnull().sum()  # It seems that 4445 are not live birth....
print preg_raw.shape


caseid            0
nbrnaliv       4445
babysex        4449
birthwgt_lb    4449
birthwgt_oz    4506
prglngth          0
outcome           0
birthord       4445
agepreg         352
finalwgt          0
dtype: int64
(13593, 10)

In [10]:
preg_raw = preg_raw.dropna()  # it has thrown away all the 4506 rows that has na...
print preg_raw.shape


(9087, 10)

In [15]:
# There are Americans gave birth to 10 children in 2002?! My God! Superwomen!
preg_raw['birthord'].unique()


Out[15]:
array([  1.,   2.,   3.,   4.,   5.,   6.,   7.,   8.,   9.,  10.])

In [25]:
# Get average pregancy length for each birth order group

grouped_prglngth_ct = preg_raw[['birthord', 'prglngth']]\
                      .groupby(['birthord'])['prglngth']\
                      .agg(['mean', 'median', 'var'])\
                      .sort_values(['mean'], ascending=False)
grouped_prglngth_ct


Out[25]:
mean median var
birthord
1.0 38.606254 39.0 7.809997
2.0 38.572527 39.0 6.629441
3.0 38.520000 39.0 5.972026
4.0 38.441247 39.0 7.612525
5.0 38.392000 39.0 6.127355
7.0 38.105263 39.0 5.432749
6.0 38.040816 39.0 21.498299
8.0 37.571429 39.0 7.952381
9.0 37.500000 37.5 4.500000
10.0 36.000000 36.0 NaN

In [ ]:
## It seems that, first born takes longer time to come but just a few hours more on average
## First born median preganancy length is the same as 2nd to 8th born