In [ ]:
%load_ext autoreload
In [ ]:
%autoreload 2
In [ ]:
from __future__ import absolute_import, division, print_function
from builtins import (
ascii, bytes, chr, dict, filter, hex, input, int, map,
next, oct, open, pow, range, round, str, super, zip)
# Standard library imports
import os
from functools import partial
from math import pi
import json
from collections import defaultdict
import random
import numpy as np
import pandas as pd
import networkx as nx
# Imports for working with shapefiles
import pyproj
from shapely.geometry import (
shape,
MultiPolygon,
mapping
)
from shapely.ops import (
transform,
cascaded_union
)
import fiona
from fiona.crs import from_epsg
# local imports
from src.modelling.input import shapes_to_graph
from src.modelling.clustering import SSGraphKMeans
In [ ]:
%matplotlib inline
In [ ]:
random.sample(range(4), 4)
In [ ]:
# Create pandas dataframes that have information about each blockgroup
poptot_df = pd.read_csv('data/block_groups/pop_tot/DEC_10_SF1_P1_with_ann.csv')
poptot_df = poptot_df[['GEO.id2', 'D001']]
poptot_df.columns = ['geoid', 'poptot']
poptot_df.drop(0, axis=0, inplace=True)
poptot_df.set_index('geoid', inplace=True)
poptot_df['poptot'] = poptot_df['poptot'].apply(int)
In [ ]:
wisc_census_blocks = 'data/block_groups/shapes/tl_2013_55_bg.shp'
# A convenience object for projecting lat/long values
# from EPSG 4326 to 3695 (approximate xy mappings for
# central Wisconsin)
project = partial(
pyproj.transform,
pyproj.Proj(init='epsg:4326'),
pyproj.Proj(init='epsg:3695')
)
In [ ]:
# Create a list of blockgroups, which has a shape, a geoid,
# and an untransformed shape
with fiona.open(wisc_census_blocks) as f:
blocks = [
{
'shape': shape(block['geometry']),
'geoid': block['properties']['GEOID']
}
for block in f
]
In [ ]:
blocks_graph = shapes_to_graph(blocks)
In [ ]:
block_geoids = [
block['geoid']
for block in blocks
]
In [ ]:
block_populations = {
geoid: poptot_df.to_dict()['poptot'][geoid]
for geoid in block_geoids
}
In [ ]:
np.random.seed(3434)
sample_node = np.random.choice(block_geoids)
In [ ]:
sample_node
In [ ]:
sample_graph = nx.ego_graph(blocks_graph, sample_node, radius=4)
In [ ]:
sample_weights = {
geoid: pop
for geoid, pop in block_populations.items()
if geoid in sample_graph
}
n_clusters_samples = 4
total_weight = sum(sample_weights.values())
ideal_weight = total_weight/n_clusters_samples
tols = np.linspace(5000, 200, 8)
In [ ]:
sample_graph_cluster = SSGraphKMeans(n_clusters=n_clusters_samples, tol=tols)
In [ ]:
sample_graph_cluster.fit(sample_graph, sample_weights)
In [ ]:
sample_graph_cluster.cluster_weights
In [ ]:
nx.draw(sample_graph.subgraph(sample_graph_cluster.clusters[2]))
In [ ]:
sum(sample_weights[node] for node in sample_graph_cluster.clusters[1])
In [ ]:
sum(sample_weights[node] for node in sample_graph_cluster.clusters[3])
In [ ]:
graph_cluster = SSGraphKMeans()
In [ ]:
graph_cluster.fit(blocks_graph, block_populations)
In [ ]:
for i in range(1, 9):
print(i, nx.is_connected(blocks_graph.subgraph(graph_cluster.clusters[i])))
In [ ]:
graph_cluster.cluster_weights
In [ ]:
'550759610002' in graph_cluster.clusters[6].border
In [ ]:
graph_cluster.clusters.keys()
In [ ]:
members_border_totals = [
(len(graph_cluster.clusters[i]), len(graph_cluster.clusters[i].border))
for i in range(1, 9)
]
In [ ]:
members_border_totals
In [ ]:
foo = list(nx.connected_component_subgraphs(blocks_graph.subgraph(graph_cluster.clusters[7])))
In [ ]:
foo[1].nodes()
In [ ]:
In [ ]: