In [1]:
import numpy as np
import pandas as pd

In [2]:
# Set some Pandas options
pd.set_option('html', False)
pd.set_option('max_columns', 30)
pd.set_option('max_rows', 10)

In [3]:
data = pd.read_hdf('/var/datasets/dshs/CD2007Q1/reduced_PUDF_base1q2007.h5','data')

Based on the example in http://www.christianpeccei.com/zipmap/

ZIP area data downloaded from ftp://ftp.cs.brown.edu/u/spr/zipdata

The mapping from states to numbers can be seen here: https://github.com/ssoper/zip-code-boundaries/blob/master/raw.html


In [4]:
import os.path
if not os.path.exists('zipdata/zt06_d00_ascii.zip'):
    !wget -P zipdata ftp://ftp.cs.brown.edu/u/spr/zipdata/zt06_d00_ascii.zip
    !unzip -d zipdata zipdata/zt06_d00_ascii.zip

In [5]:
if not os.path.exists('zipdata/zt48_d00_ascii.zip'):
    !wget -P zipdata ftp://ftp.cs.brown.edu/u/spr/zipdata/zt48_d00_ascii.zip 
    !unzip -d zipdata zipdata/zt48_d00_ascii.zip

In [6]:
def read_ascii_boundary(filestem):
    '''
    Reads polygon data from an ASCII boundary file.
    Returns a dictionary with polygon IDs for keys. The value for each
    key is another dictionary with three keys:
    'name' - the name of the polygon
    'polygon' - list of (longitude, latitude) pairs defining the main
    polygon boundary
    'exclusions' - list of lists of (lon, lat) pairs for any exclusions in
    the main polygon
    '''
    metadata_file = filestem + 'a.dat'
    data_file = filestem + '.dat'
    # Read metadata
    lines = [line.strip().strip('"') for line in open(metadata_file)]
    polygon_ids = lines[::6]
    polygon_names = lines[2::6]
    polygon_data = {}
    for polygon_id, polygon_name in zip(polygon_ids, polygon_names):
        # Initialize entry with name of polygon.
        # In this case the polygon_name will be the 5-digit ZIP code.
        polygon_data[polygon_id] = {'name': polygon_name}
    del polygon_data['0']
    # Read lon and lat.
    f = open(data_file)
    for line in f:
        fields = line.split()
        if len(fields) == 3:
            # Initialize new polygon
            polygon_id = fields[0]
            polygon_data[polygon_id]['polygon'] = []
            polygon_data[polygon_id]['exclusions'] = []
        elif len(fields) == 1:
            # -99999 denotes the start of a new sub-polygon
            if fields[0] == '-99999':
                polygon_data[polygon_id]['exclusions'].append([])
        else:
            # Add lon/lat pair to main polygon or exclusion
            lon = float(fields[0])
            lat = float(fields[1])
            if polygon_data[polygon_id]['exclusions']:
                polygon_data[polygon_id]['exclusions'][-1].append((lon, lat))
            else:
                polygon_data[polygon_id]['polygon'].append((lon, lat))
    return polygon_data

In [7]:
import csv
from pylab import *

In [8]:
import mpld3
if True:
    mpld3.enable_notebook()

In [9]:
reduced = data[['Pat_ZIP', 'Total_Charges']]
chargesbyzip = reduced.groupby('Pat_ZIP').mean()

countbyzip = reduced.groupby('Pat_ZIP').count()

In [10]:
#def makezipfigure(series, zipstem = 'zipdata/zt48_d00'):
series = chargesbyzip
series = countbyzip
zipstem = 'zipdata/zt48_d00'

maxvalue = series.max().values[0]
valuename = series.keys()[0]

# Read in ZIP code boundaries for Te
d = read_ascii_boundary(zipstem)

# Create figure and two axes: one to hold the map and one to hold
# the colorbar
figure(figsize=(5, 5), dpi=100)
map_axis = axes([0.0, 0.0, 0.8, 0.9])
cb_axis = axes([0.83, 0.1, 0.03, 0.8])
#map_axis = axes([0.0, 0.0, 4.0, 4.5])
#cb_axis = axes([4.15, 0.5, 0.15, 3.0])

#map_axis = axes([0.0, 0.0, 1.6, 1.8])
#cb_axis = axes([1.66, 0.2, 0.06, 1.2])

# Define colormap to color the ZIP codes.
# You can try changing this to cm.Blues or any other colormap
# to get a different effect
cmap = cm.PuRd

# Create the map axis
axes(map_axis)
gca().set_axis_off()

# Loop over the ZIP codes in the boundary file
for polygon_id in d:
    polygon_data = array(d[polygon_id]['polygon'])
    zipcode = d[polygon_id]['name']
    try:
        value = series.xs(zipcode).values[0]
        # Define the color for the ZIP code
        fc = cmap(float(value) / maxvalue)
    except:
        fc = (1.0, 1.0, 1.0, 1.0)

    edgecolor = [ square(min(fc[:3])) ]*3 + [0.5]
    # Draw the ZIP code
    patch = Polygon(array(polygon_data), facecolor=fc,
        edgecolor=edgecolor, linewidth=.1)
#    patch = Polygon(array(polygon_data), facecolor=fc,
#        edgecolor=(.5, .5, .5, 1), linewidth=.2)
    gca().add_patch(patch)

gca().autoscale()
title(valuename + " per ZIP Code in Texas")

# Draw colorbar
cb = mpl.colorbar.ColorbarBase(cb_axis, cmap=cmap,
    norm = mpl.colors.Normalize(vmin=0, vmax=maxvalue))
cb.set_label(valuename)

savefig('texas.pdf', dpi=100)

# Change all fonts to Arial
#for o in gcf().findobj(matplotlib.text.Text):
#    o.set_fontname('Arial')


---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-10-dc218b4ae7f3> in <module>()
     12 # Create figure and two axes: one to hold the map and one to hold
     13 # the colorbar
---> 14 figure(figsize=(5, 5), dpi=100)
     15 map_axis = axes([0.0, 0.0, 0.8, 0.9])
     16 cb_axis = axes([0.83, 0.1, 0.03, 0.8])

/usr/local/lib/python2.7/dist-packages/matplotlib/pyplot.pyc in figure(num, figsize, dpi, facecolor, edgecolor, frameon, FigureClass, **kwargs)
    433                                         frameon=frameon,
    434                                         FigureClass=FigureClass,
--> 435                                         **kwargs)
    436 
    437         if figLabel:

/usr/local/lib/python2.7/dist-packages/matplotlib/backends/backend_qt4agg.pyc in new_figure_manager(num, *args, **kwargs)
     45     FigureClass = kwargs.pop('FigureClass', Figure)
     46     thisFig = FigureClass(*args, **kwargs)
---> 47     return new_figure_manager_given_figure(num, thisFig)
     48 
     49 

/usr/local/lib/python2.7/dist-packages/matplotlib/backends/backend_qt4agg.pyc in new_figure_manager_given_figure(num, figure)
     52     Create a new figure manager instance for the given figure.
     53     """
---> 54     canvas = FigureCanvasQTAgg(figure)
     55     return FigureManagerQT(canvas, num)
     56 

/usr/local/lib/python2.7/dist-packages/matplotlib/backends/backend_qt4agg.pyc in __init__(self, figure)
     70         if DEBUG:
     71             print('FigureCanvasQtAgg: ', figure)
---> 72         FigureCanvasQT.__init__(self, figure)
     73         FigureCanvasAgg.__init__(self, figure)
     74         self._drawRect = None

/usr/local/lib/python2.7/dist-packages/matplotlib/backends/backend_qt4.pyc in __init__(self, figure)
     66         if DEBUG:
     67             print('FigureCanvasQt qt4: ', figure)
---> 68         _create_qApp()
     69 
     70         # Note different super-calling style to backend_qt5

/usr/local/lib/python2.7/dist-packages/matplotlib/backends/backend_qt5.pyc in _create_qApp()
    136                 display = os.environ.get('DISPLAY')
    137                 if display is None or not re.search(':\d', display):
--> 138                     raise RuntimeError('Invalid DISPLAY variable')
    139 
    140             qApp = QtWidgets.QApplication([str(" ")])

RuntimeError: Invalid DISPLAY variable

TODO: Compare map to census population.

TODO: Use hospital location and zipcode distances to calculate distance to hospital per zipcode

TODO: Consider 3 letter zipcodes for smoother results

TODO: Consider claim density: number/[region area]

There might be an algorithm to calculate the area of the convex hull


In [ ]: