notebook.community

Edit and run



In [ ]:

    
%load_ext autoreload



In [ ]:

    
%autoreload 2



In [ ]:

    
from __future__ import absolute_import, division, print_function
from builtins import (
    ascii, bytes, chr, dict, filter, hex, input, int, map,
    next, oct, open, pow, range, round, str, super, zip)

# Standard library imports
import os
from functools import partial
from math import pi
import json
from collections import defaultdict
import random

import numpy as np
import pandas as pd
import networkx as nx

# Imports for working with shapefiles
import pyproj
from shapely.geometry import (
    shape,
    MultiPolygon,
    mapping
)
from shapely.ops import (
    transform,
    cascaded_union
)
import fiona
from fiona.crs import from_epsg

# local imports
from src.modelling.input import shapes_to_graph
from src.modelling.clustering import SSGraphKMeans



In [ ]:

    
%matplotlib inline



In [ ]:

    
random.sample(range(4), 4)



In [ ]:

    
# Create pandas dataframes that have information about each blockgroup
poptot_df = pd.read_csv('data/block_groups/pop_tot/DEC_10_SF1_P1_with_ann.csv')
poptot_df = poptot_df[['GEO.id2', 'D001']]
poptot_df.columns = ['geoid', 'poptot']
poptot_df.drop(0, axis=0, inplace=True)
poptot_df.set_index('geoid', inplace=True)
poptot_df['poptot'] = poptot_df['poptot'].apply(int)



In [ ]:

    
wisc_census_blocks = 'data/block_groups/shapes/tl_2013_55_bg.shp'

# A convenience object for projecting lat/long values
# from EPSG 4326 to 3695 (approximate xy mappings for
# central Wisconsin)
project = partial(
    pyproj.transform,
    pyproj.Proj(init='epsg:4326'), 
    pyproj.Proj(init='epsg:3695')
)



In [ ]:

    
# Create a list of blockgroups, which has a shape, a geoid,
# and an untransformed shape
with fiona.open(wisc_census_blocks) as f:
    blocks = [
        {
            'shape': shape(block['geometry']), 
            'geoid': block['properties']['GEOID']
        }
        for block in f
    ]



In [ ]:

    
blocks_graph = shapes_to_graph(blocks)



In [ ]:

    
block_geoids = [
    block['geoid']
    for block in blocks
]



In [ ]:

    
block_populations = {
    geoid: poptot_df.to_dict()['poptot'][geoid]
    for geoid in block_geoids
}



In [ ]:

    
np.random.seed(3434)
sample_node = np.random.choice(block_geoids)



In [ ]:

    
sample_node



In [ ]:

    
sample_graph = nx.ego_graph(blocks_graph, sample_node, radius=4)



In [ ]:

    
sample_weights = {
    geoid: pop
    for geoid, pop in block_populations.items()
    if geoid in sample_graph
}
n_clusters_samples = 4
total_weight = sum(sample_weights.values())
ideal_weight = total_weight/n_clusters_samples
tols = np.linspace(5000, 200, 8)



In [ ]:

    
sample_graph_cluster = SSGraphKMeans(n_clusters=n_clusters_samples, tol=tols)



In [ ]:

    
sample_graph_cluster.fit(sample_graph, sample_weights)



In [ ]:

    
sample_graph_cluster.cluster_weights



In [ ]:

    
nx.draw(sample_graph.subgraph(sample_graph_cluster.clusters[2]))



In [ ]:

    
sum(sample_weights[node] for node in sample_graph_cluster.clusters[1])



In [ ]:

    
sum(sample_weights[node] for node in sample_graph_cluster.clusters[3])



In [ ]:

    
graph_cluster = SSGraphKMeans()



In [ ]:

    
graph_cluster.fit(blocks_graph, block_populations)



In [ ]:

    
for i in range(1, 9):
    print(i, nx.is_connected(blocks_graph.subgraph(graph_cluster.clusters[i])))



In [ ]:

    
graph_cluster.cluster_weights



In [ ]:

    
'550759610002' in graph_cluster.clusters[6].border



In [ ]:

    
graph_cluster.clusters.keys()



In [ ]:

    
members_border_totals = [
    (len(graph_cluster.clusters[i]), len(graph_cluster.clusters[i].border))
    for i in range(1, 9)
]



In [ ]:

    
members_border_totals



In [ ]:

    
foo = list(nx.connected_component_subgraphs(blocks_graph.subgraph(graph_cluster.clusters[7])))



In [ ]:

    
foo[1].nodes()



In [ ]:



In [ ]: