In [1]:
from __future__ import print_function, division
import nsfg3
In [15]:
import pandas
import numpy as np
def ReadFemResp1995():
"""Reads respondent data from NSFG Cycle 5.
returns: DataFrame
"""
dat_file = '1995FemRespData.dat.gz'
names = ['cmintvw', 'timesmar', 'cmmarrhx', 'cmbirth', 'finalwgt']
colspecs = [(12360-1, 12363),
(4637-1, 4638),
(11759-1, 11762),
(14-1, 16),
(12350-1, 12359)]
df = pandas.read_fwf(dat_file,
compression='gzip',
colspecs=colspecs,
names=names)
df.timesmar.replace([98, 99], np.nan, inplace=True)
df['evrmarry'] = (df.timesmar > 0)
nsfg3.CleanData(df)
return df
In [16]:
df = ReadFemResp1995()
In [17]:
sum(~df.evrmarry)
Out[17]:
In [18]:
len(df[(df.cmbirth >= 604) & (df.cmbirth <= 720)])
Out[18]:
In [19]:
len(df[(df.cmmarrhx >= 780) & (df.cmmarrhx <= 840)])
Out[19]:
In [20]:
df.cmintvw.value_counts().sort_index()
Out[20]:
In [21]:
df.finalwgt.value_counts().sort_index()
Out[21]:
In [22]:
df.timesmar.value_counts().sort_index()
Out[22]:
In [ ]: