In [1]:
import dask.dataframe as dd
import geopandas as gpd
import folium # leaflet.js py map
from folium import plugins
import numpy as np
import pandas as pd
import os
In [2]:
print('Required Python libraries:')
print('Pandas:', pd.__version__)
print('GeoPandas:', gpd.__version__)
print('Folium:', folium.__version__)
In [3]:
%%time
# set data file path
parquet_data_folder = '../data/crimes-2017.snappy.parq'
print('Loading crime data from: {}'.format(parquet_data_folder))
# load crimes parquet data into dask df
crimes = dd.read_parquet(parquet_data_folder, index='Date')
# load all data into memory
crimes = crimes.persist()
print('Crime data loaded into memory.')
# log records count and data frame stats
print('Crime data stats:')
print('---------------------------------------')
print('{:,} total records in {} partitions'.format(len(crimes), crimes.npartitions))
print('DataFrame size: {:,}'.format(crimes.size.compute()))
In [4]:
# get crime geo data for mapping homicides
crime_geo = crimes[['PrimaryType',
'Block',
'Description',
'LocationDescription',
'CommunityArea',
'Arrest',
'Domestic',
'Latitude',
'Longitude']].dropna()
# get homicides
homicides = crime_geo[(crime_geo['PrimaryType']=='HOMICIDE')].compute()
print('2017 Chicago homicides data preview:')
print('--------------------------------------------------------------------------')
print(homicides.head())
print('...')
print('Total 2017 homicides:', len(homicides))
In [34]:
# get homicides coordinates for the folium heatmap data
homicides_geo = homicides[['Latitude', 'Longitude']].values.tolist() # to_records()
print(homicides_geo[0:5])
print(homicides.index)
In [35]:
# homicides data preview
for homicide in homicides[0:3].iterrows():
print(homicide)
In [36]:
# Chicago center coordinates
CHICAGO_COORDINATES = (41.85, -87.68)
# leaflet.js map attributions
map_attributions = ('© <a href="http://www.openstreetmap.org/copyright">OpenStreetMap</a> '
'contributors, © <a href="http://cartodb.com/attributions">CartoDB</a>')
# create Chicago homicides map
homicides_map = folium.Map(location=CHICAGO_COORDINATES,
attr=map_attributions,
tiles='Cartodb Positron', #'OpenStreetMap',
zoom_start=10, min_zoom=10,
control_scale=True)
# create homicides heatmap
homicides_heatmap = plugins.HeatMap(homicides_geo, radius=8, blur=5,
name='2017 Chicago Homicides Heat Map')
# add homicides heatamap to leaflet map display
homicides_heatmap.add_to(homicides_map)
# create marker popups
#popups = ['lon:{}<br>lat:{}'.format(lon, lat) for (lat, lon) in homicides_geo]
# create homicides marker cluster
#plugins.MarkerCluster(locations=homicides_geo, popups=popups,
# name='2017 Chicago Homicides').add_to(homicides_map)
# create marker callback JS function
marker_callback = """\
function (row) {
var icon, marker;
icon = L.AwesomeMarkers.icon({
icon: "map-marker", markerColor: "red"});
marker = L.marker(new L.LatLng(row[0], row[1])).bindPopup('Test');
marker.setIcon(icon);
return marker;
};
"""
# create faster marker cluster layer with awesome markers
plugins.FastMarkerCluster(data=homicides_geo, callback=marker_callback).add_to(homicides_map)
# add time lapse?
# add fullscreen toggle
plugins.Fullscreen(
position='topright',
title='full screen',
title_cancel='exit full screen',
force_separate_button=True).add_to(homicides_map)
# add layer control for heatmap/markers display toggle
folium.LayerControl().add_to(homicides_map)
# save map for demo
homicides_map.save(os.path.join('../maps/', 'chicago-homicides-2017-map.html'))
# show homicides map
homicides_map
Out[36]:
In [ ]: