In [1]:
import dask.dataframe as dd
import geopandas as gpd
import folium # leaflet.js py map
from folium import plugins
import numpy as np 
import pandas as pd
import os

In [2]:
print('Required Python libraries:')
print('Pandas:', pd.__version__)
print('GeoPandas:', gpd.__version__)
print('Folium:', folium.__version__)


Required Python libraries:
Pandas: 0.20.3
GeoPandas: 0.2.1
Folium: 0.4.0

In [3]:
%%time
# set data file path
parquet_data_folder = '../data/crimes-2017.snappy.parq'
print('Loading crime data from: {}'.format(parquet_data_folder))

# load crimes parquet data into dask df
crimes = dd.read_parquet(parquet_data_folder, index='Date')

# load all data into memory
crimes = crimes.persist()
print('Crime data loaded into memory.')

# log records count and data frame stats
print('Crime data stats:')
print('---------------------------------------')
print('{:,} total records in {} partitions'.format(len(crimes), crimes.npartitions))
print('DataFrame size: {:,}'.format(crimes.size.compute()))


Loading crime data from: ../data/crimes-2017.snappy.parq
Crime data loaded into memory.
Crime data stats:
---------------------------------------
172,030 total records in 1 partitions
DataFrame size: 2,408,420
Wall time: 3.08 s

In [4]:
# get crime geo data for mapping homicides
crime_geo = crimes[['PrimaryType',
                    'Block',
                    'Description',
                    'LocationDescription',
                    'CommunityArea',
                    'Arrest',
                    'Domestic',
                    'Latitude', 
                    'Longitude']].dropna()

# get homicides
homicides = crime_geo[(crime_geo['PrimaryType']=='HOMICIDE')].compute()
print('2017 Chicago homicides data preview:')
print('--------------------------------------------------------------------------')
print(homicides.head())
print('...')
print('Total 2017 homicides:', len(homicides))


2017 Chicago homicides data preview:
--------------------------------------------------------------------------
                    PrimaryType                 Block          Description  \
Date                                                                         
2017-01-01 05:19:00    HOMICIDE      046XX N BROADWAY  FIRST DEGREE MURDER   
2017-01-01 06:18:00    HOMICIDE     046XX W MONROE ST  FIRST DEGREE MURDER   
2017-01-02 09:14:00    HOMICIDE    025XX N LOWELL AVE  FIRST DEGREE MURDER   
2017-01-03 12:20:00    HOMICIDE   034XX W FULTON BLVD  FIRST DEGREE MURDER   
2017-01-03 23:52:00    HOMICIDE  032XX W LEXINGTON ST  FIRST DEGREE MURDER   

                    LocationDescription CommunityArea  Arrest  Domestic  \
Date                                                                      
2017-01-01 05:19:00              TAVERN           3.0    True     False   
2017-01-01 06:18:00              STREET          25.0   False     False   
2017-01-02 09:14:00              STREET          20.0    True     False   
2017-01-03 12:20:00              STREET          27.0   False     False   
2017-01-03 23:52:00               ALLEY          27.0   False     False   

                      Latitude  Longitude  
Date                                       
2017-01-01 05:19:00  41.966082 -87.657908  
2017-01-01 06:18:00  41.879291 -87.741599  
2017-01-02 09:14:00  41.926841 -87.735416  
2017-01-03 12:20:00  41.886341 -87.712000  
2017-01-03 23:52:00  41.871868 -87.706610  
...
Total 2017 homicides: 441

In [34]:
# get homicides coordinates for the folium heatmap data
homicides_geo = homicides[['Latitude', 'Longitude']].values.tolist() # to_records()
print(homicides_geo[0:5])
print(homicides.index)


[[41.966081546999995, -87.657908498], [41.879290642, -87.74159851299999], [41.926840967, -87.735415625], [41.886340706999995, -87.711999596], [41.871868444, -87.706610311]]
DatetimeIndex(['2017-01-01 05:19:00', '2017-01-01 06:18:00',
               '2017-01-02 09:14:00', '2017-01-03 12:20:00',
               '2017-01-03 23:52:00', '2017-01-04 00:41:00',
               '2017-01-04 05:59:00', '2017-01-05 23:30:00',
               '2017-01-06 01:43:00', '2017-01-07 10:07:00',
               ...
               '2017-08-20 02:19:00', '2017-08-20 02:21:00',
               '2017-08-20 02:35:00', '2017-08-20 05:31:00',
               '2017-08-20 05:35:00', '2017-08-20 11:12:00',
               '2017-08-20 19:01:00', '2017-08-22 17:13:00',
               '2017-08-23 11:02:00', '2017-08-24 09:50:00'],
              dtype='datetime64[ns]', name='Date', length=441, freq=None)

In [35]:
# homicides data preview
for homicide in homicides[0:3].iterrows():
    print(homicide)


(Timestamp('2017-01-01 05:19:00'), PrimaryType                       HOMICIDE
Block                     046XX N BROADWAY
Description            FIRST DEGREE MURDER
LocationDescription                 TAVERN
CommunityArea                            3
Arrest                                True
Domestic                             False
Latitude                           41.9661
Longitude                         -87.6579
Name: 2017-01-01 05:19:00, dtype: object)
(Timestamp('2017-01-01 06:18:00'), PrimaryType                       HOMICIDE
Block                    046XX W MONROE ST
Description            FIRST DEGREE MURDER
LocationDescription                 STREET
CommunityArea                           25
Arrest                               False
Domestic                             False
Latitude                           41.8793
Longitude                         -87.7416
Name: 2017-01-01 06:18:00, dtype: object)
(Timestamp('2017-01-02 09:14:00'), PrimaryType                       HOMICIDE
Block                   025XX N LOWELL AVE
Description            FIRST DEGREE MURDER
LocationDescription                 STREET
CommunityArea                           20
Arrest                                True
Domestic                             False
Latitude                           41.9268
Longitude                         -87.7354
Name: 2017-01-02 09:14:00, dtype: object)

In [36]:
# Chicago center coordinates
CHICAGO_COORDINATES = (41.85, -87.68)

# leaflet.js map attributions
map_attributions = ('&copy; <a href="http://www.openstreetmap.org/copyright">OpenStreetMap</a> '
        'contributors, &copy; <a href="http://cartodb.com/attributions">CartoDB</a>')

# create Chicago homicides map
homicides_map = folium.Map(location=CHICAGO_COORDINATES,
                           attr=map_attributions,
                           tiles='Cartodb Positron', #'OpenStreetMap',
                           zoom_start=10, min_zoom=10,
                           control_scale=True)

# create homicides heatmap
homicides_heatmap = plugins.HeatMap(homicides_geo, radius=8, blur=5,
                                    name='2017 Chicago Homicides Heat Map')

# add homicides heatamap to leaflet map display
homicides_heatmap.add_to(homicides_map)

# create marker popups
#popups = ['lon:{}<br>lat:{}'.format(lon, lat) for (lat, lon) in homicides_geo]

# create homicides marker cluster
#plugins.MarkerCluster(locations=homicides_geo, popups=popups,
#    name='2017 Chicago Homicides').add_to(homicides_map)

# create marker callback JS function
marker_callback = """\
function (row) {
    var icon, marker;
    icon = L.AwesomeMarkers.icon({
        icon: "map-marker", markerColor: "red"});
    marker = L.marker(new L.LatLng(row[0], row[1])).bindPopup('Test');
    marker.setIcon(icon);
    return marker;
};
"""

# create faster marker cluster layer with awesome markers
plugins.FastMarkerCluster(data=homicides_geo, callback=marker_callback).add_to(homicides_map)

# add time lapse?

# add fullscreen toggle
plugins.Fullscreen(
    position='topright',
    title='full screen',
    title_cancel='exit full screen',
    force_separate_button=True).add_to(homicides_map)

# add layer control for heatmap/markers display toggle
folium.LayerControl().add_to(homicides_map)

# save map for demo
homicides_map.save(os.path.join('../maps/', 'chicago-homicides-2017-map.html'))

# show homicides map
homicides_map


Out[36]:

In [ ]: