In [36]:
from traitlets.config.manager import BaseJSONConfigManager
path = "/home/ubuntu/.jupyter/"
cm = BaseJSONConfigManager(config_dir=path)
cm.update('livereveal', {
              'theme': 'beige',
              'transition': 'cube',
              'start_slideshow_at': 'selected',
})


Out[36]:
{u'start_slideshow_at': 'selected', u'theme': 'beige', u'transition': 'cube'}

In [249]:
%matplotlib inline

import matplotlib
import numpy as np

import shapely
import shapely.geometry
from shapely.geometry import shape
from shapely.geometry import Point, LineString, Polygon, MultiPolygon 
from shapely.wkt import dumps, loads
from shapely.wkb import dumps, loads
import sys
import MySQLdb
import sqlalchemy
import os
import csv
from sqlalchemy import *
from sqlalchemy.orm import *
from sqlalchemy.ext.declarative import declarative_base
import geoalchemy2
from shapely.validation import explain_validity

from matplotlib.patches import Polygon
import matplotlib.pyplot as plt
#import pysal
import pylab as pl

In [107]:
import random


#grab a random color for coloring a figure
def get_random_color():
    r = lambda: random.randint(0,255)
    return('#%02X%02X%02X' % (r(),r(),r()))

#initialize the figure and draw the shape
def plot(shapelyGeometries, color_dict={"fill":"#AADDCC", "line":"#666666","hole_fill":"#ffffff", "hole_line":"#999999" }):
    'Plot shapelyGeometries'
    figure = pl.figure(num=None, figsize=(7, 3), dpi=180)
    axes = pl.axes()
    axes.set_aspect('equal', 'datalim')
    axes.xaxis.set_visible(False)
    axes.yaxis.set_visible(False)
    
    draw(shapelyGeometries, color_dict)
            
#Check the type and break up multipolygons        
def draw(gs, color_dict):
    'Draw shapelyGeometries'
    # Handle single and lists of geometries
    try:
        gs = iter(gs)
    except TypeError:
        gs = [gs]
    #Route polygons and multipolygons to the right place
    for g in gs:
        gType = g.geom_type
        if gType.startswith('Multi') or gType == 'GeometryCollection':
            draw(g.geoms, color_dict)
        else:
            draw_(g, color_dict)

#Break the shape into its interior and exterior rings            
def draw_(g, color_dict):

    'Draw a shapelyGeometry; thanks to Sean Gilles'
    gType = g.geom_type
    if gType == 'Point':
        pl.plot(g.x, g.y, 'k,')
    elif gType == 'LineString':
        x, y = g.xy
        pl.plot(x, y, 'b-', color=color_dict["line"])
    elif gType == 'Polygon':
        #can draw parts as multiple colors
        if not color_dict:
            color_dict={"fill":get_random_color(), 
                        "line":"#666666",
                        "hole_fill":"#FFFFFF", 
                        "hole_line":"#999999" }
    
    
        x, y = g.exterior.xy
        pl.fill(x, y, color=color_dict["fill"], aa=True) 
        pl.plot(x, y, color=color_dict["line"], aa=True, lw=1.0)
        for hole in g.interiors:
            x, y = hole.xy
            pl.fill(x, y, color=color_dict["hole_fill"], aa=True) 
            pl.plot(x, y, color=color_dict["hole_line"], aa=True, lw=1.0)

Geospatial Analytics in Python

 

Alison Alvarez

CEO and Co-Founder of BlastPoint

Let's start with a puzzle:

Why care about geospatial data?

  • Existing tools are pretty terrible and expensive
  • Geospatial skills are rare and valuable
  • It's an intuitive and incredibly powerful way of understanding the world

Check out this election map, for example:

Maps are great storytelling tools

Why am I qualified to talk about this?

  • I spent years building big data tools for giant companies
  • I invented developed new ways to interact with geospatial data (we can talk about this later)
  • I founded a company that uses geospatial data to make data more accessible

Let's start with the spatial stuff and leave the 'geo' part for later.

First, we start with a point...


In [108]:
lat = float(40.4082978)
lng = float(-79.94710909999999)

my_point = Point(lng, lat)
print my_point
plot(my_point)


POINT (-79.94710909999999 40.4082978)

That doesn't look like very much...

ENHANCE!!


In [109]:
radius = my_point.buffer(.01)
plot(radius)


Let's see what that looks like in text...


In [110]:
print radius


POLYGON ((-79.93710909999999 40.4082978, -79.93715725273327 40.4073176285967, -79.93730124719596 40.40634689677984, -79.93753969664267 40.40539495322746, -79.93787030467487 40.40447096567635, -79.9382898873565 40.40358383263174, -79.93879440387697 40.4027420976698, -79.93937899546637 40.40195386715836, -79.94003803218813 40.40122673218814, -79.94076516715836 40.40056769546637, -79.94155339766979 40.39998310387698, -79.94239513263173 40.39947858735652, -79.94328226567634 40.39905900467489, -79.94420625322745 40.39872839664267, -79.94515819677983 40.39848994719597, -79.94612892859669 40.39834595273328, -79.94710909999999 40.3982978, -79.94808927140329 40.39834595273328, -79.94906000322015 40.39848994719597, -79.95001194677253 40.39872839664267, -79.95093593432364 40.39905900467489, -79.95182306736825 40.39947858735652, -79.95266480233019 40.39998310387698, -79.95345303284162 40.40056769546637, -79.95418016781186 40.40122673218814, -79.95483920453361 40.40195386715836, -79.95542379612301 40.4027420976698, -79.95592831264348 40.40358383263174, -79.95634789532511 40.40447096567635, -79.95667850335731 40.40539495322746, -79.95691695280402 40.40634689677984, -79.95706094726671 40.4073176285967, -79.9571091 40.4082978, -79.95706094726671 40.4092779714033, -79.95691695280402 40.41024870322016, -79.95667850335731 40.41120064677254, -79.95634789532511 40.41212463432365, -79.95592831264348 40.41301176736826, -79.95542379612301 40.4138535023302, -79.95483920453361 40.41464173284164, -79.95418016781186 40.41536886781186, -79.95345303284162 40.41602790453363, -79.95266480233019 40.41661249612302, -79.95182306736825 40.41711701264348, -79.95093593432364 40.41753659532511, -79.95001194677253 40.41786720335732, -79.94906000322015 40.41810565280403, -79.94808927140329 40.41824964726672, -79.94710909999999 40.4182978, -79.94612892859669 40.41824964726672, -79.94515819677983 40.41810565280403, -79.94420625322745 40.41786720335732, -79.94328226567634 40.41753659532511, -79.94239513263173 40.41711701264348, -79.94155339766979 40.41661249612302, -79.94076516715836 40.41602790453363, -79.94003803218813 40.41536886781186, -79.93937899546637 40.41464173284164, -79.93879440387697 40.4138535023302, -79.9382898873565 40.41301176736826, -79.93787030467487 40.41212463432365, -79.93753969664267 40.41120064677254, -79.93730124719596 40.41024870322016, -79.93715725273327 40.4092779714033, -79.93710909999999 40.4082978))

Now we've seen two types of spatial objects, here is the third:


In [111]:
lineString1 = LineString(((-3, -5), (3, 5)))
print lineString1
plot(lineString1)


LINESTRING (-3 -5, 3 5)

There are also "multi" versions of everything: multipoint, multiline and multipolygon

Now that we know the basics of what we have to manipulate, let's focus on shapes and look at what we can do with them.


First, let's talk about holes.


In [112]:
polygon0 = shapely.geometry.Polygon(
    ((1, 1), (5, 1), (5, 5), (1, 5), (1, 1)),      # Shell
)
print polygon0
print polygon0.area
plot(polygon0)


POLYGON ((1 1, 5 1, 5 5, 1 5, 1 1))
16.0

In [113]:
polygon1 = shapely.geometry.Polygon(
    ((1, 1), (5, 1), (5, 5), (1, 5), (1, 1)),      # Shell
    [
        ((2, 2), (2, 3), (3, 3), (3, 2), (2, 2)),  # Hole
        ((3, 3), (3, 4), (4, 4), (4, 3), (3, 3)),  # Hole
    ]
)
print polygon1.area
print polygon1
plot(polygon1)


14.0
POLYGON ((1 1, 5 1, 5 5, 1 5, 1 1), (2 2, 2 3, 3 3, 3 2, 2 2), (3 3, 3 4, 4 4, 4 3, 3 3))

You can use polygons to carve holes in other polygons.


In [114]:
pt = Point(0,0)
circle =  pt.buffer(.5)
small_circle = pt.buffer(.25)
plot([circle, small_circle, pt], color_dict=None)



In [115]:
donut = circle.difference(small_circle)
plot(donut, color_dict=None)


Next, you can combine polygons.


First, you can do a union on two shapes...


In [116]:
circle = Point(5,2.5).buffer(2)
plot([circle, polygon1], color_dict=None)



In [117]:
plot(circle.union(polygon1))


You can also use unions to create multipolygons


In [118]:
circle2 = Point(7,7).buffer(2)
plot([circle2, polygon1], color_dict=None)



In [119]:
new_multipolygon = polygon1.union(circle2)
plot(new_multipolygon)
print new_multipolygon.type
print "CIRCLE AREA: %f" % (circle2.area)
print "POLYGON AREA: %f" % (polygon1.area)
print "MULTIPOLYGON AREA: %f" % (new_multipolygon.area)


MultiPolygon
CIRCLE AREA: 12.546194
POLYGON AREA: 14.000000
MULTIPOLYGON AREA: 26.546194

You can also union a bunch of shapes at once


In [120]:
from shapely.ops import cascaded_union, unary_union
buffered_points_list = []

circle_set = range(5);
   
for x in circle_set:
    buffered_points_list.append(Point(x,x).buffer(1))


plot(buffered_points_list, color_dict=None)



In [121]:
union_shape = cascaded_union(buffered_points_list)
plot(union_shape, color_dict=None)


You can also create new shapes from intersections, which is incredibly useful for measuring the extent of overlap


In [122]:
circle_a = Point(0,0).buffer(1)
circle_b = Point(.5,0).buffer(1)
print circle_a.area
plot([circle_a, circle_b], color_dict=None)


3.13654849055

In [123]:
overlap = circle_a.intersection(circle_b)
print overlap.area
plot(overlap)


2.147888554

There are also a bunch of informational functions that will tell you about the relationship of two shapes


In [124]:
print circle_a.overlaps(circle_b)
plot([circle_a, circle_b], color_dict=None)


True

In [250]:
circle_c = Point(2,0).buffer(1)

print circle_a.overlaps(circle_c)
plot([circle_a, circle_c], color_dict=None)


False

In [126]:
square = shapely.geometry.Polygon(
    ((1, 1), (2, 1), (2, 2), (1, 2), (1, 1)),      # Shell
)
print circle_a.touches(square)
plot([circle_a, square], color_dict=None)


False

In [127]:
square = shapely.geometry.Polygon(
    ((1, -1), (2, -1), (2, 2), (1, 2), (1, -1)),      # Shell
)
print circle_a.touches(square)
plot([circle_a, square], color_dict=None)


True

In [128]:
circle_d = Point(1,0).buffer(.5)
print circle_b.within(circle_a)
plot([circle_a, circle_d], color_dict=None)


False

In [129]:
circle_e = Point(0,0).buffer(.5)
print circle_e.within(circle_a)
plot([circle_a, circle_e], color_dict=None)


True

Next, let's talk about simplification.


One of the dangerous things about using 2-D space is that there are a lot of N^2 algorithms, so you need to be clever about keeping computation time down.


One way is to simplify shapes and take down the number of points.

We are going to pause here and take in our first shape dataset, US states.


We'll load them using a handy package called Fiona that can read shapefiles. There are from the US Census bureau.


I'll save them in a dictionary with the key as the state name.


In [160]:
import fiona
def get_from_fiona(filename, id_attr):
    
    from shapely.geometry import shape

    data = {}

    c = fiona.open(filename, 'r')
    for shape_data in c:
        name =  shape_data['properties'][id_attr]
        parsed_shape = shape(shape_data['geometry'])
        
        data[name] = parsed_shape

    return data

In [168]:
#This function will help total up a count of all exterior ring points in a polygon or multipolygon
def points(s):
    if s.type == 'Polygon':
        return len(s.exterior.coords)
    if s.type == 'Point':
        return s
    else:
        x = 0
        for g in s.geoms:
            if g.type != 'Point' and g.type != 'LineString':
                x += len(g.exterior.coords)
        return x

In [170]:
state_dict = get_from_fiona("data/cb_2013_us_state_500k.shp", "NAME")

michigan = state_dict["Michigan"]
print "Points: " + str(points(michigan))
plot(michigan)


Points: 5277

Here is what some of the internal data looks like

OrderedDict([(u'STATEFP', u'04'), (u'STATENS', u'01779777'), (u'AFFGEOID', u'0400000US04'), (u'GEOID', u'04'), (u'STUSPS', u'AZ'), (u'NAME', u'Arizona'), (u'LSAD', u'00'), (u'ALAND', 294205037082), (u'AWATER', 1027846143)]) OrderedDict([(u'STATEFP', u'05'), (u'STATENS', u'00068085'), (u'AFFGEOID', u'0400000US05'), (u'GEOID', u'05'), (u'STUSPS', u'AR'), (u'NAME', u'Arkansas'), (u'LSAD', u'00'), (u'ALAND', 134772954601), (u'AWATER', 2958815561)]) OrderedDict([(u'STATEFP', u'08'), (u'STATENS', u'01779779'), (u'AFFGEOID', u'0400000US08'), (u'GEOID', u'08'), (u'STUSPS', u'CO'), (u'NAME', u'Colorado'), (u'LSAD', u'00'), (u'ALAND', 268432676592), (u'AWATER', 1170523775)]) OrderedDict([(u'STATEFP', u'09'), (u'STATENS', u'01779780'), (u'AFFGEOID', u'0400000US09'), (u'GEOID', u'09'), (u'STUSPS', u'CT'), (u'NAME', u'Connecticut'), (u'LSAD', u'00'), (u'ALAND', 12541965607), (u'AWATER', 1815409624)]) OrderedDict([(u'STATEFP', u'11'), (u'STATENS', u'01702382'), (u'AFFGEOID', u'0400000US11'), (u'GEOID', u'11'), (u'STUSPS', u'DC'), (u'NAME', u'District of Columbia'), (u'LSAD', u'00'), (u'ALAND', 158347880), (u'AWATER', 18636405)]) OrderedDict([(u'STATEFP', u'13'), (u'STATENS', u'01705317'), (u'AFFGEOID', u'0400000US13'), (u'GEOID', u'13'), (u'STUSPS', u'GA'), (u'NAME', u'Georgia'), (u'LSAD', u'00'), (u'ALAND', 148962779995), (u'AWATER', 4947803555)]) OrderedDict([(u'STATEFP', u'16'), (u'STATENS', u'01779783'), (u'AFFGEOID', u'0400000US16'), (u'GEOID', u'16'), (u'STUSPS', u'ID'), (u'NAME', u'Idaho'), (u'LSAD', u'00'), (u'ALAND', 214045724209), (u'AWATER', 2397731902)])

Make sure you get the shapefile that is clipped to the coastline...


In [171]:
state_dict = get_from_fiona("data/tl_2016_us_state.dbf", "NAME")

weird_michigan = state_dict["Michigan"]
print "Points: " + str(points(weird_michigan))
plot(weird_michigan)


Points: 10624

In [174]:
sim_michigan0 = michigan.simplify(0.0001)
print "Points: " + str(points(sim_michigan0))
plot(sim_michigan0)


Points: 3154

In [175]:
sim_michigan1 = michigan.simplify(0.001)
print "Points: " + str(points(sim_michigan1))
plot(sim_michigan1)


Points: 2697

In [176]:
sim_michigan2 = michigan.simplify(0.03)
print "Points: " + str(points(sim_michigan2))
plot(sim_michigan2)


Points: 375

In [177]:
sim_michigan3 = michigan.simplify(0.1)
print "Points: " + str(points(sim_michigan3))
plot(sim_michigan3)


Points: 245

In [178]:
sim_michigan4 = michigan.simplify(1)
print "Points: " + str(points(sim_michigan4))
plot(sim_michigan4)


Points: 190

In [179]:
sim_michigan5 = michigan.simplify(.1, preserve_topology=False)
print "Points: " + str(points(sim_michigan5))
plot(sim_michigan5)


Points: 76

In [157]:
plot(michigan.convex_hull)


You can also find centroids and use those for shortcuts.


In [186]:
centroid_point = michigan.centroid
plot([michigan, centroid_point.buffer(1), centroid_point.buffer(.75), centroid_point.buffer(.5)], color_dict=None)