In [1]:
import dask.dataframe as dd
import geopandas as gpd
import folium # leaflet.js py map
from folium import plugins
import numpy as np
import pandas as pd
import os
from IPython import __version__ as ipython_version
In [2]:
print('Required Python libraries:')
print('IPython:', ipython_version)
print('Pandas:', pd.__version__)
print('GeoPandas:', gpd.__version__)
print('Folium:', folium.__version__)
In [3]:
%%time
# set parquet data folder path
parquet_data_folder = '../data/crimes-2001-to-present.snappy.parq'
print('Loading crime data from: {}'.format(parquet_data_folder))
# load crimes parquet data into dask df
crimes = dd.read_parquet(parquet_data_folder, index='Date')
# load all data into memory
crimes = crimes.persist()
print('Crime data loaded into memory.')
In [4]:
# get Chicago community areas geo data
areas = gpd.read_file('../data/chicago-community-areas.geojson')
# drop unused columns
areas = areas.drop(['area', 'area_num_1', 'comarea', 'comarea_id',
'shape_area', 'shape_len', 'perimeter'], axis=1)
# rename area_numbe and convert it to int type
# for crime dataframe merge on community area # later
areas = areas.rename(columns={'area_numbe': 'CommunityArea'})
areas['CommunityArea'] = areas['CommunityArea'].astype(np.int64)
print('Chicago Community Areas:')
print(areas.head())
print('...\nTotal Community Areas: {:,}\n...'.format(len(areas)))
areas.info()
In [5]:
# load Chicago community areas with sides info
# for plotting crime by Chicago 'sides'
community_areas = pd.read_csv('../data/chicago-community-areas.csv') #, index_col='CommunityName')
community_areas.head()
# get community crime stats
community_areas['Total'] = crimes.groupby('CommunityArea').size().compute().rename('Total')
community_crime = community_areas.dropna()
# print community crime stats
print('High Chicago Crime Communities:')
print(community_crime.sort_values(by='Total', ascending=False).head())
print('...\nTotal Communities: {:,}\n...'.format(len(community_crime)))
community_crime.info()
In [6]:
# merge areas geo dataframe with community crime data for mapping
community_crime_geo_df = areas.merge(community_crime, on='CommunityArea')
community_crime_geo_df.head()
Out[6]:
In [7]:
print('Folium v:', folium.__version__)
# create Chicago map
CHICAGO_COORDINATES = (41.85, -87.68)
map_attributions = ('© <a href="http://www.openstreetmap.org/copyright">OpenStreetMap</a> '
'contributors, © <a href="http://cartodb.com/attributions">CartoDB</a>')
community_crime_map = folium.Map(location=CHICAGO_COORDINATES,
attr=map_attributions,
tiles=None, #'Cartodb Positron', #'OpenStreetMap',
zoom_start=10, min_zoom=10,
control_scale=True)
# map Chicago communities crime
community_crime_map.choropleth(
geo_data='../data/chicago-community-areas.geojson',
data=community_crime_geo_df,
columns=['community', 'Total'],
key_on='feature.properties.community',
line_opacity=0.3,
fill_opacity=0.5,
fill_color='YlOrRd',
#threshold_scale=[10000, 50000, 100000, 200000, 300000, 400000],
legend_name='Chicago Crime by Community (2001-2017)',
highlight=True, smooth_factor=2)
# add fullscreen toggle
plugins.Fullscreen(
position='topright',
title='full screen',
title_cancel='exit full screen',
force_separate_button=True).add_to(community_crime_map)
# add base map tile options
folium.TileLayer('OpenStreetMap').add_to(community_crime_map)
folium.TileLayer('stamentoner').add_to(community_crime_map)
folium.TileLayer('Cartodb Positron').add_to(community_crime_map)
folium.LayerControl().add_to(community_crime_map)
# save map for demo
community_crime_map.save(os.path.join('../maps/', 'chicago-community-crime-map.html'))
# show map
community_crime_map
Out[7]:
In [ ]: