In [7]:
import thinkstats2
import thinkplot
import nsfg
import first
import numpy as np
In [8]:
df = nsfg.ReadFemPreg()
In [9]:
df.birthwgt_lb
Out[9]:
In [4]:
df.agepreg
Out[4]:
In [ ]:
bins = np.arange(10, 48, 3)
indices = np.digitize(df.agepreg, bins)
groups = df.groupby(indices)
In [ ]:
ages = [group.agepreg.mean() for i, group in groups][1:-1]
cdfs = [thinkstats2.Cdf(group.totalwgt_lb) for i, group in groups][1:-1]
In [ ]:
thinkplot.PrePlot(3)
for percent in [75, 50, 25]:
weights = [cdf.Percentile(percent) for cdf in cdfs]
label = '%dth' % percent
thinkplot.Plot(ages, weights, label=label)
In [ ]:
thinkplot.Show(xlabel="Age (years)", ylabel="Weight (lbs)")
In [14]:
live, firsts, others = first.MakeFrames()
df = live.dropna(subset=['agepreg', 'totalwgt_lb'])
In [15]:
thinkstats2.Corr(df.agepreg, df.totalwgt_lb)
Out[15]:
In [16]:
thinkstats2.SpearmanCorr(df.agepreg, df.totalwgt_lb)
Out[16]:
In [ ]: