In [ ]:
%load_ext autoreload
In [ ]:
%autoreload 2
In [ ]:
from __future__ import absolute_import, division, print_function
from builtins import (
ascii, bytes, chr, dict, filter, hex, input, int, map,
next, oct, open, pow, range, round, str, super, zip)
# Standard library imports
import os
from functools import partial
from math import pi
import json
from collections import defaultdict
import numpy as np
import pandas as pd
from sklearn.cluster import KMeans
# Imports for working with shapefiles
import pyproj
from shapely.geometry import (
shape,
MultiPolygon,
mapping
)
from shapely.ops import (
transform,
cascaded_union
)
import fiona
from fiona.crs import from_epsg
# matplotlib imports
import matplotlib.pyplot as plt
from matplotlib.colors import to_rgb, to_hex
from matplotlib import cm
%matplotlib inline
# local imports
from src.modelling.clustering import SameSizeKMeans
from src.output.shapes import (
generate_colors,
plot_shapes,
generate_shapefiles,
geojson_from_shapefile
)
In [ ]:
# Create pandas dataframes that have information about each blockgroup
poptot_df = pd.read_csv('data/block_groups/pop_tot/DEC_10_SF1_P1_with_ann.csv')
poptot_df = poptot_df[['GEO.id2', 'D001']]
poptot_df.columns = ['geoid', 'poptot']
poptot_df.drop(0, axis=0, inplace=True)
poptot_df.set_index('geoid', inplace=True)
pop18_df = pd.read_csv('data/block_groups/pop18/DEC_10_SF1_P10_with_ann.csv')
pop18_df = pop18_df[['GEO.id2', 'D001', 'D003']]
pop18_df.columns = ['geoid', 'pop18', 'pop18wht']
pop18_df.drop(0, axis=0, inplace=True)
pop18_df.set_index('geoid', inplace=True)
In [ ]:
wisc_census_blocks = 'data/block_groups/shapes/tl_2013_55_bg.shp'
# A convenience object for projecting lat/long values
# from EPSG 4326 to 3695 (approximate xy mappings for
# central Wisconsin)
project = partial(
pyproj.transform,
pyproj.Proj(init='epsg:4326'),
pyproj.Proj(init='epsg:3695')
)
In [ ]:
# Create a list of blockgroups, which has a shape, a geoid,
# and an untransformed shape
with fiona.open(wisc_census_blocks) as f:
blocks = [
{
'shape': transform(project, shape(block['geometry'])),
'geoid': block['properties']['GEOID'],
'untransformed': shape(block['geometry'])
}
for block in f
]
In [ ]:
# Assign information for each block from the above dataframes
for block in blocks:
block['centroid'] = np.array([coord[0] for coord in block['shape'].centroid.xy])
block['pop18'] = int(pop18_df.loc[block['geoid']]['pop18'])
block['pop18wht'] = int(pop18_df.loc[block['geoid']]['pop18wht'])
block['poptot'] = int(poptot_df.loc[block['geoid']]['poptot'])
In [ ]:
# Create shapes of the whole of Wisconsin to be used
# as cutouts for final districts to improve the quality
# of their presentation
with fiona.open('data/districts/congressional/Wisconsin_Congressional_Districts.shp') as f:
wisconsin = cascaded_union([transform(project, shape(district['geometry'])) for district in f])
wisc_untrans = cascaded_union([shape(district['geometry']) for district in f])
# Long, Lat bounds of the state for future reference
print(wisc_untrans.bounds)
In [ ]:
# Create variable which is the ideal district population
# for a Congressional district in Wisconsin
total_wisc_pop = np.sum([block['poptot'] for block in blocks])
ideal_dist_pop = total_wisc_pop/8
In [ ]:
def cleanup_blocks(blocks, labels):
dist_blocks = {
label: [block for block in blocks
if block['label'] == label]
for label in np.unique(labels)
}
for label in dist_blocks:
_blocks = dist_blocks[label]
for block in _blocks:
if block['iscontiguous']:
continue
for other_block in _blocks:
if block['geoid'] != other_block['geoid']:
if block['shape'].touches(other_block['shape']):
block['iscontiguous'] = True
other_block['iscontiguous'] = True
break
else:
block['iscontiguous'] = False
for block in blocks:
votes = defaultdict(int)
if block['iscontiguous']:
continue
for other_block in blocks:
if other_block['geoid'] == block['geoid']:
continue
if other_block['shape'].touches(block['shape']):
votes[other_block['label']] +=1
new_label = max(votes, key=votes.get)
block['label'] = new_label
In [ ]:
coord_array = np.array([block['centroid'] for block in blocks])
weight_array = np.array([block['poptot'] for block in blocks])
In [ ]:
sskmeans = SameSizeKMeans(n_clusters=8, save_labels=True)
sskmeans.fit(
coord_array, weights=weight_array,
weight_tol=2e-3, order='s')
In [ ]:
all_labels = sskmeans.all_labels_
In [ ]:
# EPSG specification to use - lat/long
epsg_spec = 4326
for step, labels in enumerate(all_labels):
print('Starting step',step)
# Assign labels to each blockgroup for current step
for label, block in zip(labels, blocks):
block['label'] = int(label)
block['iscontiguous'] = False
# Make blockgroups contiguous by label
while not all([block['iscontiguous'] for block in blocks]):
print('Making blocks contiguous')
cleanup_blocks(blocks, labels)
# Create a dictionary of districts by label, and assign
# their (untransformed) shape to be the union of all the
# (untransformed) shapes of the blocks with that label
districts = [{} for label in np.unique(labels)]
for label in np.unique(labels):
districts[label]['shape'] = cascaded_union([
block['shape']
for block in blocks
if block['label'] == label
])
districts[label]['untransformed'] = cascaded_union([
block['untransformed']
for block in blocks
if block['label'] == label
])
districts[label]['id'] = int(label) + 1
# Assign population totals, compactness, etc. to each district
for label in np.unique(labels):
# population total
_tot_pop = np.sum([
block['poptot']
for block in blocks
if block['label'] == label
])
districts[label]['poptot'] = _tot_pop
# total voting age population
_tot_pop18 = np.sum([
block['pop18']
for block in blocks
if block['label'] == label
])
districts[label]['pop18'] = _tot_pop18
# total white voting age population
_tot_pop18wht = np.sum([
block['pop18wht']
for block in blocks
if block['label'] == label
])
districts[label]['pop18wht'] = _tot_pop18wht
# District compactness
_area = districts[label]['shape'].area/(1000)**2
_perimeter = districts[label]['shape'].length/1000
districts[label]['compactness'] = (4*pi*_area/_perimeter**2)
# scaled population difference from ideal
_pop_diff = _tot_pop - ideal_dist_pop
districts[label]['popdiff'] = (1 + _pop_diff/ideal_dist_pop)/2
# Categorical district colors
hex_dist_colors = {
district['id']: to_hex(cm.Set1(district['id'] - 1))
for district in districts
}
# Compactness district colors with reference to a
# 3x2 rectangle
ref_aspect = 1.5
reference = 4*pi*(ref_aspect)/(2*(1+ref_aspect))**2
cmpct_hex_colors = {
district['id']: to_hex(cm.Reds_r(district['compactness']/reference))
for district in districts
}
# Population difference colors
popdiff_hex_colors = {
district['id']: to_hex(cm.seismic(district['popdiff']))
for district in districts
}
# Set the schema for the shapefile
schema = {
'type': 'Feature',
'geometry': 'Polygon',
'properties': {
'id': 'int',
'id_color': 'str',
'cmpctness': 'float',
'cmpct_col': 'str',
'popdiff': 'float',
'pdiff_col': 'str',
'poptot': 'int',
'pop18': 'int',
'pop18wht': 'int'
}
}
# Set the values associated for each field of the schema
# for each district in districts
all_schema_values = [
{
'type': 'Feature',
'geometry': mapping(district['untransformed'].intersection(wisc_untrans)),
'properties': {
'id': district['id'],
'id_color': hex_dist_colors[district['id']],
'cmpctness': district['compactness'],
'cmpct_col': cmpct_hex_colors[district['id']],
'popdiff': district['popdiff'],
'pdiff_col': popdiff_hex_colors[district['id']],
'poptot': district['poptot'],
'pop18': district['pop18'],
'pop18wht': district['pop18wht']
}
}
for district in districts
]
# File location to write the shapefiles to
file_folder = 'data/districts/generated/sskmeans{}'.format(step)
# Make shapefiles from districts according to all_schema_values
print('Writing shapefiles')
generate_shapefiles(
districts, file_folder, schema,
all_schema_values, epsg_spec
)
# Generate geojson from shapefiles for consumption in bokeh
print('Creating GeoJSON')
shapefile_location = os.path.join(
file_folder, 'sskmeans{}.shp'.format(step)
)
geojson_target = 'src/static/geojson/sskmeans{}.json'.format(step)
geojson_from_shapefile(
shapefile_location, geojson_target, 4e-3
)
In [ ]:
labels = all_labels[0]
for label, block in zip(labels, blocks):
block['label'] = int(label)
block['iscontiguous'] = False
# Make blockgroups contiguous by label
while not all([block['iscontiguous'] for block in blocks]):
print('Making blocks contiguous')
cleanup_blocks(blocks, labels)
# Create a dictionary of districts by label, and assign
# their (untransformed) shape to be the union of all the
# (untransformed) shapes of the blocks with that label
districts = [{} for label in np.unique(labels)]
for label in np.unique(labels):
districts[label]['shape'] = cascaded_union([
block['shape']
for block in blocks
if block['label'] == label
])
districts[label]['untransformed'] = cascaded_union([
block['untransformed']
for block in blocks
if block['label'] == label
])
districts[label]['id'] = int(label) + 1
# Assign population totals, compactness, etc. to each district
for label in np.unique(labels):
# population total
_tot_pop = np.sum([
block['poptot']
for block in blocks
if block['label'] == label
])
districts[label]['poptot'] = _tot_pop
# scaled population difference from ideal
_pop_diff = _tot_pop - ideal_dist_pop
districts[label]['popdiff'] = (1 + _pop_diff/ideal_dist_pop)/2
In [ ]:
popdiff_colors = [
cm.seismic(district['popdiff'])
for district in districts
]
plot_shapes(
districts, popdiff_colors, alpha=1,
fig_file='images/kmeans_popdiff.png',
cutout = wisconsin)
In [ ]:
fig, ax = plt.subplots(1,1, figsize=(6,0.5))
fig.subplots_adjust(
left=None, bottom=0.5, right=None,
top=1, wspace=None, hspace=None
)
ax.bar(sampling, np.ones(sampling.shape), width=0.001, color=legend)
ax.set_xlim(0,1)
ax.set_xticks([0, 0.5, 0.995])
ax.set_xticklabels(['-Ideal Population', '0', '+Ideal Population'])
ax.set_yticks([])
ax.spines['top'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['bottom'].set_visible(False)
fig.savefig('images/popdiff_legend.png')
In [ ]:
for step, labels in enumerate(all_labels):
print('Starting step',step)
# Assign labels to each blockgroup for current step
for label, block in zip(labels, blocks):
block['label'] = int(label)
block['iscontiguous'] = False
# Make blockgroups contiguous by label
while not all([block['iscontiguous'] for block in blocks]):
print('Making blocks contiguous')
cleanup_blocks(blocks, labels)
# Create a dictionary of districts by label, and assign
# their (untransformed) shape to be the union of all the
# (untransformed) shapes of the blocks with that label
districts = [{} for label in np.unique(labels)]
for label in np.unique(labels):
districts[label]['shape'] = cascaded_union([
block['shape']
for block in blocks
if block['label'] == label
])
districts[label]['id'] = int(label) + 1
# Categorical district colors
dist_colors = [
cm.Set1(district['id'] - 1)
for district in districts
]
file_location = 'images/sskmeans_s/sskmeans_step{}.png'.format(step)
plot_shapes(
districts, dist_colors, alpha=1,
fig_file=file_location, cutout = wisconsin)
In [ ]: