In [1]:
import datashader as ds
import dask.dataframe as dd
import bokeh.models
from bokeh.charts import HeatMap
from bokeh.plotting import (
figure, show, output_file, output_notebook)
from bokeh.tile_providers import STAMEN_TONER
import holoviews as hv
from holoviews.operation.datashader import datashade
from holoviews.operation.datashader import aggregate
from holoviews.operation import decimate
from holoviews import streams
from holoviews.streams import RangeXY, PlotSize
# geo libs imports
import geopandas as gpd
import geoviews as gv
import cartopy.crs as cartopy_crs
import math
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import numpy as np
import pandas as pd
import seaborn as sns
import os
# version imports
from IPython import __version__ as ipython_version
from pandas import __version__ as pandas_version
from bokeh import __version__ as bokeh_version
from IPython.core.display import Markdown
In [2]:
Markdown(open('README.md').read())
Out[2]:
In [3]:
print('Required Python libraries:')
print('IPython - %s' % ipython_version)
print('Pandas - %s' % pandas_version)
print('Bokeh - %s' % bokeh_version)
In [4]:
%matplotlib inline
#%config InlineBackend.figure_format = 'svg'
# set neat seaborn whitegrid styles for matplotlib charts
plt.style.use('seaborn')
sns.set_style('whitegrid')
# config holoviews for bokeh charts
hv.extension('bokeh')
In [5]:
%%time
# set parquet data folder path
parquet_data_folder = '../data/crimes-2001-to-present.snappy.parq'
print('Loading crime data from: {}'.format(parquet_data_folder))
# load crimes parquet data into dask df
crimes = dd.read_parquet(parquet_data_folder, index='Date')
# load all data into memory
crimes = crimes.persist()
print('Crime data loaded into memory.')
In [6]:
# get Chicago community areas geo data
areas = gpd.read_file('../data/chicago-community-areas.geojson')
# drop unused columns
areas = areas.drop(['area', 'area_num_1', 'comarea', 'comarea_id',
'shape_area', 'shape_len', 'perimeter'], axis=1)
# rename area_numbe and convert it to int type
# for crime dataframe merge on community area # later
areas = areas.rename(columns={'area_numbe': 'CommunityArea'})
areas['CommunityArea'] = areas['CommunityArea'].astype(np.int64)
print('Chicago Community Areas:')
print(areas.head())
print('...\nTotal Community Areas: {:,}\n...'.format(len(areas)))
areas.info()
In [7]:
# load Chicago community areas with sides info
# for plotting crime by Chicago 'sides'
community_areas = pd.read_csv('../data/chicago-community-areas.csv') #, index_col='CommunityName')
community_areas.head()
# get community crime stats
community_areas['Total'] = crimes.groupby('CommunityArea').size().compute().rename('Total')
community_crime = community_areas.dropna()
# print community crime stats
print('High Chicago Crime Communities:')
print(community_crime.sort_values(by='Total', ascending=False).head())
print('...\nTotal Communities: {:,}\n...'.format(len(community_crime)))
community_crime.info()
In [8]:
# merge areas geo dataframe with community crime data for mapping
community_crime_geo_df = areas.merge(community_crime, on='CommunityArea')
community_crime_geo_df.head()
Out[8]:
In [9]:
# converts geo coordinates to mercator
def geo_to_mercator(x_lon, y_lat):
if abs(x_lon) <= 180 and abs(y_lat) < 90:
num = x_lon * 0.017453292519943295
x = 6378137.0 * num
a = y_lat * 0.017453292519943295
x_mercator = x
y_mercator = 3189068.5 * math.log((1.0 + math.sin(a)) / (1.0 - math.sin(a)))
return x_mercator, y_mercator
else:
print('Invalid coordinate values for conversion')
print('Chicago mercator coordinates: ', geo_to_mercator(41.91038,-87.67805))
# convert Chicago geo bounds for mapping
geo_bounds = [41.65, -87.78, 42.02, -87.53] # x,y start, x,y end
start_coord = geo_to_mercator(geo_bounds[0], geo_bounds[1])
end_coord = geo_to_mercator(geo_bounds[2], geo_bounds[3])
print('Chicago area mercator bounds:')
print('start:', start_coord)
print('end:', end_coord)
In [10]:
# create Bokeh geo json data source for mapping
areas_ds = bokeh.models.GeoJSONDataSource(geojson=community_crime_geo_df.to_json())
print('min:', community_crime['Total'].min())
print('max:', community_crime['Total'].max())
# world mercator extent
# mercator_extent = dict(start=-20000000, end=20000000, bounds=None)
# Chicago bounds
x_range = bokeh.models.Range1d(start=start_coord[0], end=end_coord[0]) #**mercator_extent)
y_range = bokeh.models.Range1d(start=start_coord[1], end=end_coord[1]) #**mercator_extent)
# map Chicago community areas with Bokeh
output_notebook()
TOOLS = 'pan,wheel_zoom,reset,hover,save'
fig = figure(title='Chicago Crimes by Community (2001-2017)',
tools=TOOLS,
#x_range=x_range,
#y_range=y_range,
x_axis_location=None,
y_axis_location=None,
responsive=True)
fig.axis.visible = False
fig.grid.grid_line_color = None
#fig.add_tile(STAMEN_TONER)
color_mapper = bokeh.models.LinearColorMapper(
palette=bokeh.palettes.Spectral5,
low=community_crime['Total'].min(),
high=community_crime['Total'].max())
fig.patches(xs='xs', ys='ys',
source=areas_ds,
fill_color={'field': 'Total', 'transform': color_mapper},
fill_alpha=0.5,
line_color='black',
line_width=0.5)
hover = fig.select_one(bokeh.models.HoverTool)
hover.point_policy = 'follow_mouse'
hover.tooltips = u"""
<div>
<div class="bokeh_hover_tooltip">@community, @Side</div>
<div class="bokeh_hover_tooltip">Total Crimes: @Total</div>
</div>
"""
output_file('../maps/chicago-crime-by-community.html')
show(fig)
In [11]:
# get monthly homicides stats
crime_types = crimes[['PrimaryType']]
homicides = crime_types[(crime_types['PrimaryType']=='HOMICIDE')]
daily_homicides = homicides.resample('D').count().compute()
daily_homicides = daily_homicides.rename(columns={'PrimaryType': 'Homicides'})
daily_homicides['Year'] = daily_homicides.index.year
daily_homicides['Month'] = daily_homicides.index.month
daily_homicides['Date'] = daily_homicides.index
print(daily_homicides.head())
print(daily_homicides.tail(10))
In [12]:
# create holoviews dataset for the heatmap
homicides_dataset = hv.Dataset(daily_homicides, vdims=[('Homicides', 'Homicides')])
homicides_dataset
Out[12]:
In [13]:
%opts HeatMap [width=600 height=480 logz=True fontsize={'xticks': '8pt'}, \
tools=['hover'] toolbar='above' colorbar=True xrotation=30] (cmap='RdBu_r')
# create homicides heatmap
homicides_heatmap = hv.HeatMap(homicides_dataset.aggregate(['Year', 'Month'], np.sum),
label='Chicago Homicides Heatmap (2001-2017)')
homicides_heatmap
Out[13]:
In [14]:
%opts Curve [width=300 height=480 yaxis='right'] (line_color='black') {+framewise}
# declare Tap stream with heatmap as source and initial values
posxy = hv.streams.Tap(source=homicides_heatmap, x=2017, y=8)
# histogram tap function
def tap_histogram(x,y):
return hv.Curve(homicides_dataset.select(Month=y, Year=x),
kdims=['Date'],
label='Year %s, Month: %s' % (x, y))
# see http://build.holoviews.org/reference/streams/bokeh/heatmap_tap.html
homicides_heatmap.select(Year=(2001, 2018)) + hv.DynamicMap(tap_histogram, kdims=[], streams=[posxy])
Out[14]:
In [15]:
homicides_boxwhisker = hv.BoxWhisker(
homicides_dataset.aggregate(['Year', 'Month'], np.sum),
kdims=['Month'], vdims=['Homicides'],
label='Monthly Chicago Homicides (2001-2017)')
plot_options = dict(show_legend=False, width=400)
style = dict(color='Month')
homicides_boxwhisker(plot=plot_options, style=style)
Out[15]:
In [16]:
%%time
crimes = crimes.dropna()
crimes.tail()
In [17]:
# reset holoviews plot and style options for all cirmes map
hv.util.opts('Image [width=800 height=400 shared_axes=False logz=True] {+axiswise} ')
hv.util.opts("HLine VLine (color='white' line_width=1) Layout [shared_axes=False] ")
hv.util.opts("Curve [xaxis=None yaxis=None show_grid=False, show_frame=False] (color='orangered') {+framewise}")
# Reproject crime points from Mercator to PlateCarree (latitude/longitude)
# see: http://holoviews.org/gallery/apps/bokeh/nytaxi_hover.html#bokeh-gallery-nytaxi-hover
points = gv.Points(crimes, kdims=['Longitude', 'Latitude'], vdims=[], crs=cartopy_crs.GOOGLE_MERCATOR)
projected = gv.operation.project_points(points, projection=cartopy_crs.PlateCarree())
projected = projected.redim(Longitude='lon', Latitude='lat')
# Use datashader to rasterize and linked streams for interactivity
agg = aggregate(projected, link_inputs=True, x_sampling=0.0001, y_sampling=0.0001)
pointerx = hv.streams.PointerX(x=-74, source=projected)
pointery = hv.streams.PointerY(y=40.8, source=projected)
vline = hv.DynamicMap(lambda x: hv.VLine(x), streams=[pointerx])
hline = hv.DynamicMap(lambda y: hv.HLine(y), streams=[pointery])
sampled = hv.util.Dynamic(agg, operation=lambda obj, x: obj.sample(lon=x),
streams=[pointerx], link_inputs=False)
hvobj = ((hline * vline)) # << sampled.opts(plot={'Curve': dict(width=100)}))
hvobj
#hvobj = ((agg * hline * vline) << sampled.opts(plot={'Curve': dict(width=100)}))
# Obtain Bokeh document and set the title
#doc = hv.renderer('bokeh').server_doc(hvobj)
#doc.title = 'Chicago Crimes FireFly Map Crosshair'
Out[17]:
In [ ]: