In [7]:
import thinkstats2
import thinkplot
import nsfg
import first
import numpy as np

In [8]:
df = nsfg.ReadFemPreg()

In [9]:
df.birthwgt_lb


Out[9]:
0      8
1      7
2      9
3      7
4      6
5      8
6      9
7      8
8      7
9      6
10     7
11     7
12     4
13   NaN
14   NaN
...
13578     6
13579     7
13580   NaN
13581     6
13582   NaN
13583   NaN
13584     6
13585   NaN
13586   NaN
13587   NaN
13588     6
13589   NaN
13590   NaN
13591     7
13592     7
Name: birthwgt_lb, Length: 13593, dtype: float64

In [4]:
df.agepreg


Out[4]:
0     33.16
1     39.25
2     14.33
3     17.83
4     18.33
5     27.00
6     28.83
7     30.16
8     28.08
9     32.33
10    25.75
11    23.00
12    24.58
13    29.83
14    27.50
...
13578    24.00
13579    25.91
13580    28.25
13581    30.66
13582    33.25
13583    23.66
13584    26.91
13585    21.41
13586    22.41
13587    23.41
13588    17.91
13589    18.50
13590    19.75
13591    21.58
13592    21.58
Name: agepreg, Length: 13593, dtype: float64

In [ ]:
bins = np.arange(10, 48, 3)
indices = np.digitize(df.agepreg, bins)
groups = df.groupby(indices)

In [ ]:
ages = [group.agepreg.mean() for i, group in groups][1:-1]
cdfs = [thinkstats2.Cdf(group.totalwgt_lb) for i, group in groups][1:-1]

In [ ]:
thinkplot.PrePlot(3)
for percent in [75, 50, 25]:
    weights = [cdf.Percentile(percent) for cdf in cdfs]
    label = '%dth' % percent
    thinkplot.Plot(ages, weights, label=label)

In [ ]:
thinkplot.Show(xlabel="Age (years)", ylabel="Weight (lbs)")

In [14]:
live, firsts, others = first.MakeFrames()
df = live.dropna(subset=['agepreg', 'totalwgt_lb'])

In [15]:
thinkstats2.Corr(df.agepreg, df.totalwgt_lb)


Out[15]:
0.068833970354109056

In [16]:
thinkstats2.SpearmanCorr(df.agepreg, df.totalwgt_lb)


Out[16]:
0.094610041096582262

In [ ]: