In [1]:
# Import packages
import json, string
import folium
import requests
import geocoder
import pandas as pd
from folium import Map, Marker, GeoJson, LayerControl
from ediblepickle import checkpoint
%matplotlib inline
In [2]:
# Read API keys from file
with open("secrets/.wmata") as fin:
wmata_key = fin.read().strip()
with open("secrets/.walkscore") as fin:
walkscore_key = fin.read().strip()
Leaflet is currently one of the most popular JavaScript libraries for mapping.
Python is a great language for the web. Using the same language, you can
Python's continuing utility is due to its ecosystem, not its syntax or speed. For example, according to Wikipedia, TensorFlow was the fastest growing deep learning framework in fall 2016. The second fastest was a high-level Python library called Keras which can seamlessly plug into TensorFlow's architecture.
There's a similar relationship between Leaflet.js and the Python package Folium.
Folium uses the Leaflet API to allow users to write Python code to generate and manipulate interactive JavaScript maps. This also allows for drawing those maps in Jupyter notebooks.
You can drag, zoom, click, and hover. More generally you can provide input and get output, even if that output requires backend calculations. JavaScript was designed to ferry information between the frontend and the backend seamlessly.
All this makes for a good tool.
The WMATA API is free to use. Learn more here.
Interactivity:
An alternative to calling eg. the OpenStreetMap API, geocoder provides a wrapper around a number of popular geocoding services.
In [3]:
location = 'Union Station'
loc = geocoder.osm(location)
In [4]:
loc.json
Out[4]:
In [5]:
# Maps are hierarchical objects
latlng = [loc.lat, loc.lng]
bus_map = Map(location=latlng,
zoom_start=15)
bus_map.add_child(Marker(location=latlng, popup=loc.address, icon = folium.Icon(color = 'blue')))
# bus_map.add_child(GeoJson(loc.geojson))
bus_map
Out[5]:
In [6]:
# Saving maps
# bus_map.save('bus_map.html')
In [7]:
# Set general WMATA parameters
session = requests.Session()
session.mount('https://api.wmata.com', requests.adapters.HTTPAdapter(max_retries = 2))
headers = {'api_key': wmata_key}
radius = '1000'
Caching and checkpointing is crucial for dealing with APIs sustainably and respectfully. You should never hit an endpoint twice for the same data.
Edible Pickle is a checkpointing tool that allows you to save the expensive results of a function so that it need not be run again if that result is already present.
In the following cell, setting refresh = True
will make the function get current data instead of relying on the cache.
In [8]:
# Call API for bus locations
bus_endpoint = 'https://api.wmata.com/Bus.svc/json/jBusPositions'
@checkpoint(key = string.Template('{0}x{1}_radius{2}.buslist'), work_dir = 'cache/', refresh = False)
def get_buses(lat, lon, radius):
"""
All values passed as strings and radius in meters
"""
params = {
# 'RouteID': 'B12',
'Lat': lat,
'Lon': lon,
'Radius': radius
}
response = session.get(bus_endpoint, params = params, headers = headers)
if not response.status_code == 200:
raise ValueError("Response status not 200")
else:
return response.json()['BusPositions']
In [9]:
bus_list = get_buses(loc.lat, loc.lng, radius)
# buses_in_the_area = len(bus_list)
In [10]:
# example response element
bus_list[0]
Out[10]:
In [11]:
for bus in bus_list:
folium.features.RegularPolygonMarker(location = [bus['Lat'], bus['Lon']],
popup = 'Route %s to %s' % (bus['RouteID'], bus['TripHeadsign']),
number_of_sides = 3,
radius = 10,
weight = 1,
fill_opacity = 0.8).add_to(bus_map)
bus_map
Out[11]:
In [12]:
nh_map = Map(location = latlng,
zoom_start = 13,
tiles = 'Stamen Toner')
A file format that combines geographical data with associated JSON attributes. You can find or create these datasets in a variety of ways. This particular dataset comes from this GitHub repository.
In [13]:
with open('geojson/neighborhood-composition.geojson') as fin:
gjdata = json.load(fin)
nhoods = gjdata['features']
nhoods[0]
Out[13]:
In [14]:
# Create Pandas DataFrame
nhdata = pd.DataFrame([nhood['properties'] for nhood in nhoods], columns = sorted(nhoods[0]['properties'].keys()))
In [15]:
nhdata.head()
Out[15]:
In [16]:
# Using Pandas to create derived variables
nhdata['Density'] = nhdata['POP90'] / nhdata['AREA_']
nhdata.describe()
Out[16]:
Check out ColorBrewer for advice about coloring for cartography.
In [17]:
# Set up colormaps to represent the range of values
from branca.colormap import linear
popcolors = linear.GnBu_06.scale(
nhdata['POP90'].min(),
nhdata['POP90'].max() / 1.5)
povcolors = linear.PuRd_06.scale(
nhdata['POVRATE'].min(),
nhdata['POVRATE'].max() / 2)
print(popcolors(1000))
povcolors
Out[17]:
In [18]:
# Adds a caption to the map that shows the color scale
popcolors.caption = 'Population Scale'
popcolors.add_to(nh_map)
Out[18]:
In [19]:
GeoJson(gjdata,
name = 'population',
style_function = lambda feature: {
'color': 'black',
'weight': 1,
'dashArray': '5, 5',
'fillColor': popcolors(feature['properties']['POP90'])
}
).add_to(nh_map)
Out[19]:
In [20]:
GeoJson(gjdata,
name = 'poverty rate',
style_function = lambda feature: {
'color': 'black',
'weight': 1,
'dashArray': '5, 5',
'fillColor': povcolors(feature['properties']['POVRATE'])
}
).add_to(nh_map)
Out[20]:
In [21]:
LayerControl().add_to(nh_map)
Out[21]:
In [22]:
# Colormaps can be changed on the fly
nh_map
Out[22]:
This example notebook goes through some other techniques for creating choropleths with additional functionality all within one choropleth method.
A measure of how dependent an address is on having a car. For example, areas that require cars are more expensive than they seem to live in. An API is available.
Interactivity:
In [23]:
# Get Metrobus route data from WMATA
route_endpoint = 'https://api.wmata.com/Bus.svc/json/jRouteDetails'
date = '2018-09-01' # Changing this constant will likely require hitting the API
def get_route_shape(route, date): # eg. 'L2', 'YYYY-MM-DD'
params = {'RouteID': route}
if date:
params['Date'] = date
response = session.get(route_endpoint, params = params, headers = headers)
if response.status_code != 200:
raise ValueError("Error: Response status not 200")
else:
return response.json()['Direction0']['Shape']
# for stop in route_shape:
# Marker(location=[stop['Lat'], stop['Lon']], popup=str(stop['SeqNum'])).add_to(m)
In [24]:
# Get Walkscore data - 500 ft grid resolution
"""
http://api.walkscore.com/score?format=json&
address=1119%8th%20Avenue%20Seattle%20WA%2098101&lat=47.6085&
lon=-122.3295&transit=1&bike=1&wsapikey=<YOUR-WSAPIKEY>
"""
walkscore_endpoint = 'http://api.walkscore.com/score'
def get_walkscore(pin):
params = {'format': 'json',
'wsapikey': walkscore_key,
'lat': pin[0],
'lon': pin[1],
'transit': '1',
'bike': '1',
'address': geocoder.osm(pin, method='reverse').address}
response = requests.get(walkscore_endpoint, params = params)
if response.status_code != 200:
return None
else:
return response.json()
In [25]:
@checkpoint(key = string.Template('{0}_scores_{1}.panda'), work_dir = 'cache/', refresh = False)
def get_route_scores(route, date):
shape = get_route_shape(route, date)
pins = [(pin['Lat'], pin['Lon']) for pin in shape]
walk_scores = []
transit_scores = []
bike_scores = []
for pin in pins:
score_json = get_walkscore(pin)
if not score_json:
walk_scores.append(-1)
transit_scores.append(-1)
bike_scores.append(-1)
continue
walk_scores.append(score_json.get('walkscore', -1))
transit_scores.append(score_json.get('transit', {}).get('score', -1))
bike_scores.append(score_json.get('bike', {}).get('score', -1))
df = pd.DataFrame({'pin': pins,
'walk_score': walk_scores,
'transit_score': transit_scores,
'bike_score': bike_scores})
df = df[['pin', 'walk_score', 'bike_score', 'transit_score']]
return df
In [26]:
# Example response from Walk Score API
test = get_walkscore(latlng)
In [27]:
test
Out[27]:
In [28]:
# This is where the magic happens
# Cached data for: L2, V5, E4, W4, 38B, 70
# Maps here: https://www.wmata.com/schedules/maps/
route = 'L2'
df = get_route_scores(route, date)
In [29]:
print(df.shape)
df.head()
Out[29]:
In [30]:
score_map = Map(location = loc.latlng, zoom_start = 12, tiles = 'Stamen Terrain')
In [31]:
color_line = folium.features.ColorLine(
df['pin'],
colors = df['walk_score'],
colormap = ['y', 'orange', 'r'],
weight = 6,
name = u'Route %s Walk Score\u00ae' % route)
color_line.add_to(score_map)
Out[31]:
In [32]:
# This adds the population layer back in
GeoJson(gjdata,
name = 'population',
style_function = lambda feature: {
'color': 'black',
'weight': 1,
'dashArray': '5, 5',
'fillColor': popcolors(feature['properties']['POP90'])
}
).add_to(score_map)
Out[32]:
In [33]:
LayerControl().add_to(score_map)
Out[33]:
In [34]:
score_map
Out[34]:
In [35]:
# score_map.save("score_map.html")
© Ariel M'ndange-Pfupfu 2018
In [ ]: