In [1]:
import pandas as pd

In [2]:
import fiona
from shapely.geometry import shape

In [3]:
import sys
sys.path.append('..')

In [4]:
from pummeler.geocode_data import geocode_data

In [5]:
county_region_00 = geocode_data('county_region_00').region.to_dict()
county_region_10 = geocode_data('county_region_10').region.to_dict()

In [6]:
from collections import defaultdict

In [7]:
region_shapes_00 = defaultdict(list)
region_shapes_10 = defaultdict(list)

for county in fiona.open('UScounties'):
    shp = shape(county['geometry'])
    fips = county['properties']['FIPS']
    if county['properties']['STATE_NAME'] == 'Alaska':
        region_shapes_00['AK_00_01'].append(shp)
        region_shapes_10['AK_10_01'].append(shp)
    else:
        region_shapes_00[county_region_00[fips]].append(shp)
        region_shapes_10[county_region_10[fips]].append(shp)

In [8]:
from shapely.ops import cascaded_union

In [9]:
centroids00 = pd.DataFrame.from_records(
    [(k,) + cascaded_union(v).centroid.coords[0] for k, v in region_shapes_00.iteritems()],
    columns=['region', 'lon', 'lat'], index='region').sortlevel()
centroids10 = pd.DataFrame.from_records(
    [(k,) + cascaded_union(v).centroid.coords[0] for k, v in region_shapes_10.iteritems()],
    columns=['region', 'lon', 'lat'], index='region').sortlevel()

In [10]:
fn = '../pummeler/data/regions.h5'
centroids00.to_hdf(fn, 'centroids00', format='table', complib='blosc', complevel=9)
centroids10.to_hdf(fn, 'centroids10', format='table', complib='blosc', complevel=9)