In [35]:
import pandas as pd
from pandas import DataFrame
import census
import settings
import us
import numpy as np
from itertools import islice
# instantiate the census object
c=census.Census(settings.CENSUS_KEY)
In [36]:
states_fips = np.array([state.fips for state in us.states.STATES])
In [42]:
## EXERCISE
## FILL in with your generator for all census places in the 2010 census
places=c.sf1.get('NAME,P0010001',
geo={'for': 'place:*',
'in': 'state:*'})
def places(variables="NAME",process="by_state"):
if process == 'by_state':
for state in states_fips:
places=c.sf1.get('NAME,P0010001',
geo={'for': 'place:*',
'in': 'state:%s' % states_fips})
for k in places:
yield k
else:
for k in places:
yield k
In [49]:
places().next()
Out[49]:
In [47]:
# use this code to run your code
# I recommend replacing the None in islice to a small number to make sure you're on
# the right track
r = list(islice(places("NAME,P0010001"), None))
places_df = DataFrame(r)
places_df.P0010001 = places_df.P0010001.astype('int')
places_df['FIPS'] = places_df.apply(lambda s: s['state']+s['place'], axis=1)
print "number of places", len(places_df)
print "total pop", places_df.P0010001.sum()
places_df.head()
Out[47]:
In [48]:
# if you've done this correctly, the following asserts should stop complaining
assert places_df.P0010001.sum() == 228457238
# number of places in 2010 Census
assert len(places_df) == 29261
In [ ]: