In [1]:
from pathlib import Path
import json
from functools import reduce
import math
import datetime as dt
import pytz
from itertools import product
from collections import OrderedDict
import time
import requests
import numpy as np
import pandas as pd
import geopandas as gpd
import shapely.ops as so
ROOT = Path('../')
DATA_DIR = ROOT/'data'
CRS_NZGD49 = {'init': 'epsg:27200', 'no_defs': True}
CRS_NZTM = {'init': 'epsg:2193', 'no_defs': True}
CRS_WGS84 = {'init': 'epsg:4326'}
%matplotlib inline
In [9]:
def get_secret(key, secrets_path=ROOT/'secrets.json'):
secrets_path = Path(secrets_path)
with secrets_path.open() as src:
secrets = json.load(src)
return secrets[key]
GOOGLE_MATRIX_URL = "https://maps.googleapis.com/maps/api/distancematrix/json"
GOOGLE_KEY = get_secret('GOOGLE_API_KEY_ALEX')
def get_matrix(origins, destinations, mode, departure_time=None, url=GOOGLE_MATRIX_URL, key=GOOGLE_KEY,
timezone='Pacific/Auckland'):
"""
Call Google to compute the duration-distance matrix from the list of origins to the list of destinations
by the given mode at the given departure time.
INPUT:
origins
List of WGS84 longitude-latitude pairs that will be round to 5 decimal places
destinations
List of WGS84 longitude-latitude pairs that will be round to 5 decimal places
mode
String; one of 'driving', 'walking', 'bicycling', or 'transit'
departure_time
Optional; ISO 8601 datetime string or 'now'; e.g. '2017-06-01T07:30:00'; can't be from the past
timezone
String; timezone for query, e.g. 'Pacific/Auckland'; see https://en.wikipedia.org/wiki/List_of_tz_database_time_zones
OUTPUT:
A decoded JSON string described at https://developers.google.com/maps/documentation/distance-matrix/intro#DirectionsResponseElements .
NOTES:
- If ``departure_time`` is not null and ``mode='driving'``, then each request must contain at most 100 elements,
where the number of elements equals the product of the number of origins and number of destinations.
"""
valid_modes = ['driving', 'walking', 'bicycling', 'transit']
if mode not in valid_modes:
raise ValueError('mode must be one of {!s}'.format(valid_modes))
origs = '|'.join(["{:.05f},{:.05f}".format(lat, lon) for lon, lat in origins])
dests = '|'.join(["{:.05f},{:.05f}".format(lat, lon) for lon, lat in destinations])
if departure_time not in [None, 'now']:
tz = pytz.timezone(timezone)
departure_time = dt.datetime.strptime(departure_time, '%Y-%m-%dT%H:%M:%S')
departure_time = int(tz.localize(departure_time).timestamp())
params = {
'origins': origs,
'destinations': dests,
'key': key,
'mode': mode,
'departure_time': departure_time,
}
r = requests.get(url, params=params)
# Raise an error if bad request
r.raise_for_status()
return r.json()
def matrix_to_df(matrix, orig_names=None, dest_names=None):
"""
Given a (decoded) JSON time-distance matrix of the form output by :func:``get_matrix``,
a list of origin names (defaults to [0, 1, 2, etc.]),
and a list of destination names (defaults to [0, 1, 2, etc.]), convert the matrix to a DataFrame with
the columns:
- ``'origin'``: one of ``orig_names``
- ``'destination'``: one of ``dest_names``
- ``'duration'``: time from origin to destination
- ``'distance'``: distance from origin to destination
The origin and destination names should be listed in the same order as the 'sources' and 'targets'
attributes of ``matrix``, respectively.
"""
# Initialize DataFrame
columns = ['orig', 'orig_name', 'dest', 'dest_name', 'duration', 'distance']
f = pd.DataFrame([], columns=columns)
# Append origins and destinations
origs, dests = zip(*product(matrix['origin_addresses'], matrix['destination_addresses']))
f['orig'] = origs
f['dest'] = dests
if orig_names is not None and dest_names is not None:
orig_names, dest_names = zip(*product(orig_names, dest_names))
f['orig_name'] = orig_names
f['dest_name'] = dest_names
# Append durations and distances
if 'duration_in_traffic' in matrix['rows'][0]['elements'][0]:
dur_key = 'duration_in_traffic'
else:
dur_key = 'duration'
durs = []
dists = []
for r in matrix['rows']:
for e in r['elements']:
if e['status'] == 'OK':
durs.append(e[dur_key]['value'])
dists.append(e['distance']['value'])
else:
durs.append(None)
dists.append(None)
f['duration'] = durs
f['distance'] = dists
return f
def build_matrix(rental_area_points, mode, departure_time=None, chunk_size=100,
url=GOOGLE_MATRIX_URL, key=GOOGLE_KEY):
"""
Compute the duration-distance matrix between all pairs of rental area points given,
but skip the diagonal entries, that is, the ones with origin equal to destination.
To do this, call:func:`get_matrix` repeatedly.
Group the duration-distance calls into ``chunk_size``-to-1 chunks.
INPUT:
rental_area_points
GeoDataFrame
mode
See :func:`get_matrix`
departure_time
See :func:`get_matrix`
chunk_size
Max number of origin-destination rows per matrix query
url
See :func:`get_matrix`
key
See :func:`get_matrix`
OUTPUT:
A DataFrame of the form...
NOTES:
- Sleeps for 1 second after every call to :func:`get_matrix` to stay within API usage limits
"""
f = rental_area_points.copy()
frames = []
status = 'OK'
for __, row in f.iterrows():
# Quit if bad status
if status != 'OK':
print('Quitting because of bad status:', status)
break
# Set the single destination
dests = [row['geometry'].coords[0]]
ra = row['rental_area']
dest_names = [ra]
# Create origin chunks and compute matrix for each chunk to destination
ff = f[f['rental_area'] != ra].copy()
num_chunks = math.ceil(ff.shape[0]/chunk_size)
for g in np.array_split(ff, num_chunks):
# Get origins
origs = [geo.coords[0] for geo in g['geometry']]
orig_names = g['rental_area'].values
# Get matrix
try:
j = get_matrix(origs, dests, mode=mode, departure_time=departure_time, url=url, key=key)
status = j['status']
if status != 'OK':
break
df = matrix_to_df(j, orig_names, dest_names)
except:
df = pd.DataFrame()
df['orig'] = np.nan
df['orig_name'] = orig_names
df['dest'] = np.nan
df['dest_name'] = ra
df['duration'] = np.nan
df['distance'] = np.nan
frames.append(df)
time.sleep(1)
return pd.concat(frames).sort_values(['orig', 'dest'])
In [ ]:
# Test some
origs = [
[174.66339111328125, -36.45000844447082],
[174.76158142089844, -36.86533886128865],
[174.85633850097656, -37.20517535620264],
]
dests = origs[:2]
matrix = get_matrix(origs, dests, mode='transit', departure_time='2017-06-01T08:00:00')
matrix_to_df(matrix, ['bingo', 'bongo', 'boom'], ['bingo', 'bongo'])
In [10]:
# Estimate cost of job at 0.5/1000 USD/element beyond 2500 elements
def compute_google_cost(n, with_freebies=False):
"""
If ``with_freebies``, then ignore the first 2500 elements, which are free.
"""
d = OrderedDict()
d['#rental areas'] = n
N = 4*(n**2 - n)
d['#elements needed for 4 modes'] = N
d['exceeds 100000-element daily limit?'] = N > 100000
d['duration for job in minutes'] = (N/100)/60
if with_freebies:
d['cost for job in USD'] = (N - 2500)*(0.5/1000)
else:
d['cost for job in USD'] = N*(0.5/1000)
return pd.Series(d)
compute_google_cost(f.shape[0])
Out[10]:
In [15]:
key = get_secret('GOOGLE_API_KEY_PHIL')
regions = ['auckland', 'wellington', 'canterbury']
for region in regions:
# Get poits
path = DATA_DIR/region/'rental_area_points.geojson'
f = gpd.read_file(str(path))
print('* ', region)
print(compute_google_cost(f.shape[0]))
# Build matrices
departure_time='2017-06-01T07:30:00'
for mode in ['driving', 'walking', 'bicycling', 'transit']:
%time m = build_matrix(f, mode=mode, departure_time=departure_time, key=key)
n = m.shape[0]
k = m[m['distance'].notnull()].shape[0]
print(mode, 'hit rate', k/n)
path = DATA_DIR/region/'commutes_{!s}.csv'.format(mode)
m.to_csv(str(path), index=False)
In [ ]: