In [ ]:
%load_ext autoreload

In [ ]:
%autoreload 2

In [ ]:
from __future__ import absolute_import, division, print_function
from builtins import (
    ascii, bytes, chr, dict, filter, hex, input, int, map,
    next, oct, open, pow, range, round, str, super, zip)

# Standard library imports
import os
from functools import partial
from math import pi
import json
from collections import defaultdict
import random

import numpy as np
import pandas as pd
import networkx as nx

# Imports for working with shapefiles
import pyproj
from shapely.geometry import (
    shape,
    MultiPolygon,
    mapping
)
from shapely.ops import (
    transform,
    cascaded_union
)
import fiona
from fiona.crs import from_epsg

# local imports
from src.modelling.input import shapes_to_graph
from src.modelling.clustering import SSGraphKMeans

In [ ]:
%matplotlib inline

In [ ]:
random.sample(range(4), 4)

In [ ]:
# Create pandas dataframes that have information about each blockgroup
poptot_df = pd.read_csv('data/block_groups/pop_tot/DEC_10_SF1_P1_with_ann.csv')
poptot_df = poptot_df[['GEO.id2', 'D001']]
poptot_df.columns = ['geoid', 'poptot']
poptot_df.drop(0, axis=0, inplace=True)
poptot_df.set_index('geoid', inplace=True)
poptot_df['poptot'] = poptot_df['poptot'].apply(int)

In [ ]:
wisc_census_blocks = 'data/block_groups/shapes/tl_2013_55_bg.shp'

# A convenience object for projecting lat/long values
# from EPSG 4326 to 3695 (approximate xy mappings for
# central Wisconsin)
project = partial(
    pyproj.transform,
    pyproj.Proj(init='epsg:4326'), 
    pyproj.Proj(init='epsg:3695')
)

In [ ]:
# Create a list of blockgroups, which has a shape, a geoid,
# and an untransformed shape
with fiona.open(wisc_census_blocks) as f:
    blocks = [
        {
            'shape': shape(block['geometry']), 
            'geoid': block['properties']['GEOID']
        }
        for block in f
    ]

In [ ]:
blocks_graph = shapes_to_graph(blocks)

In [ ]:
block_geoids = [
    block['geoid']
    for block in blocks
]

In [ ]:
block_populations = {
    geoid: poptot_df.to_dict()['poptot'][geoid]
    for geoid in block_geoids
}

In [ ]:
np.random.seed(3434)
sample_node = np.random.choice(block_geoids)

In [ ]:
sample_node

In [ ]:
sample_graph = nx.ego_graph(blocks_graph, sample_node, radius=4)

In [ ]:
sample_weights = {
    geoid: pop
    for geoid, pop in block_populations.items()
    if geoid in sample_graph
}
n_clusters_samples = 4
total_weight = sum(sample_weights.values())
ideal_weight = total_weight/n_clusters_samples
tols = np.linspace(5000, 200, 8)

In [ ]:
sample_graph_cluster = SSGraphKMeans(n_clusters=n_clusters_samples, tol=tols)

In [ ]:
sample_graph_cluster.fit(sample_graph, sample_weights)

In [ ]:
sample_graph_cluster.cluster_weights

In [ ]:
nx.draw(sample_graph.subgraph(sample_graph_cluster.clusters[2]))

In [ ]:
sum(sample_weights[node] for node in sample_graph_cluster.clusters[1])

In [ ]:
sum(sample_weights[node] for node in sample_graph_cluster.clusters[3])

In [ ]:
graph_cluster = SSGraphKMeans()

In [ ]:
graph_cluster.fit(blocks_graph, block_populations)

In [ ]:
for i in range(1, 9):
    print(i, nx.is_connected(blocks_graph.subgraph(graph_cluster.clusters[i])))

In [ ]:
graph_cluster.cluster_weights

In [ ]:
'550759610002' in graph_cluster.clusters[6].border

In [ ]:
graph_cluster.clusters.keys()

In [ ]:
members_border_totals = [
    (len(graph_cluster.clusters[i]), len(graph_cluster.clusters[i].border))
    for i in range(1, 9)
]

In [ ]:
members_border_totals

In [ ]:
foo = list(nx.connected_component_subgraphs(blocks_graph.subgraph(graph_cluster.clusters[7])))

In [ ]:
foo[1].nodes()

In [ ]:


In [ ]: