Zip Coordinate Data Migration

Task1: Zip Code Center Coordinate Data

Data Source: US Census Bureau: 2015 U.S. Gazetteer Files - ZIP Code Tabulation Areas


In [1]:
import csv

In [2]:
zips = []
with open('Data/2015_Gaz_zcta_national.txt', 'r') as file:
    next(file)
    reader=csv.reader(file,delimiter='\t')
    for row in reader:
        zips.append({'code': row[0], 'lat': row[5], 'lon': row[6]})

In [3]:
len(zips)


Out[3]:
33144

In [4]:
with open('zip_center_coordinate.csv', 'w', newline='') as file:
    field_names = ['ZipCode', 'City', 'State', 'lat', 'lon']
    writer = csv.writer(file, delimiter='\t', quotechar='|', quoting=csv.QUOTE_MINIMAL)
    writer.writerow(field_names)
    for zip in zips:
        writer.writerow([zip['code'], '', '', zip['lat'], zip['lon']])

Task2: Extract zip code border data to csv


In [5]:
%matplotlib inline
import shapefile
from functools import reduce

from mpl_toolkits.basemap import Basemap
import matplotlib.pyplot as plt
import numpy as np

In [6]:
def find_max_and_min(shape_records):
    points = map(lambda x: x.shape.points, shape_records)
    points = reduce(lambda x, y: x + y, points)
    points = np.array(points)
    lon_min, lon_max = points[:, 0].min() - .05, points[:, 0].max() + .05
    lat_min, lat_max = points[:, 1].min() - .05, points[:, 1].max() + .05
    return lon_min, lon_max, lat_min, lat_max

def sub_polys_in_poly(poly_points):
    polys = []
    poly = []
    for index, point in enumerate(poly_points):
        poly.append(point)
        if len(poly) >= 2 and poly[0] == poly[len(poly)-1]:
            polys.append(poly)
            poly = []
    return polys

def plotShapes(plt, shape_records, colors):
    lon_min, lon_max, lat_min, lat_max = find_max_and_min(shape_records)
    
    map = Basemap(projection='merc', 
    lat_0 = np.average([lat_min, lat_max]), lon_0 = np.average([lon_min, lon_max]),
    resolution = 'c', area_thresh = 0.1,
    llcrnrlon=lon_min, llcrnrlat=lat_min,
    urcrnrlon=lon_max, urcrnrlat=lat_max)
    
    map.drawcoastlines()
    map.drawstates()
    map.fillcontinents(color = 'coral')
    map.drawmapboundary()
    
    for index, shape_record in enumerate(shape_records):
        polys = sub_polys_in_poly(shape_record.shape.points)
        for poly in polys:
            points = np.array(poly)
            x, y = map(points[:, 0], points[:,1])
            map.plot(x, y, 'o-', color=colors[index])

In [7]:
def generate_rows(shape_record):
    rows = []
    zip_code = shape_record.record[0]
    polys = sub_polys_in_poly(shape_record.shape.points)
    for poly in polys:    
        for index, point in enumerate(poly):
            if index == 0 :
                type = 'Start'
            elif index == len(poly) - 1:
                type = 'End'
            else:
                type = ''
            row = {'lat': point[1], 'lon': point[0], 'zip': zip_code, 'index': index, 'type': type}
            rows.append(row)
    return rows

In [8]:
sf = shapefile.Reader("Data/cb_2015_us_zcta510_500k/cb_2015_us_zcta510_500k")

In [9]:
rows = []
for shape_record in sf.shapeRecords():
    rows += generate_rows(shape_record)

In [10]:
len(rows)


Out[10]:
5564686

In [11]:
with open('zip_border_point_coordinates.csv', 'w', newline='') as file:
    field_names = ['Lat', 'Lon', 'ZipCode', 'OrderNum', 'Type']
    writer = csv.writer(file, delimiter='\t', quotechar='|', quoting=csv.QUOTE_MINIMAL)
    writer.writerow(field_names)
    for row in rows:
        writer.writerow([row['lat'], row['lon'], row['zip'], row['index'], row['type']])

In [ ]: