In [1]:
import matplotlib.pyplot as plt
import metapack as mp
import pandas as pd
import numpy as np
import seaborn as sns
sns.set(color_codes=True)
%matplotlib inline
In [2]:
%load_ext metapack.jupyter.magic
In [3]:
pkg = mp.jupyter.open_source_package()
pkg
Out[3]:
In [5]:
fp = pkg.reference('cnss_2017_dta').resolved_url.fspath
In [8]:
# The variable KRq1, Number of Dogs, has a duplicate label problem when the file is converted with StatTransfer;
# StatTransfer seperates out the number from the text "dogs", and pandas
# interprets this as multiple labels with the value "dogs". The easiest way to
# handle this is to remove the variable.
itr = pd.read_stata(fp, iterator=True)
columns = list(itr.varlist)
columns.remove('KRq1')
df = pd.read_stata(fp, columns = columns)
labels = dict(itr.variable_labels()) # Store variable labels as a dict
df.head()
Out[8]:
In [25]:
df.church.cat.codes
Out[25]:
In [27]:
df.loc[974].church
Out[27]:
In [9]:
print('\n'.join( '{}\t{}'.format(k,v) for k,v in labels.items() if k != 'KRq1') )
In [ ]: