In [1]:
import dask.dataframe as dd
import geopandas as gpd
import folium # leaflet.js py map
from folium import plugins
import numpy as np 
import pandas as pd
import os

from IPython import __version__ as ipython_version

In [2]:
print('Required Python libraries:')
print('IPython:', ipython_version)
print('Pandas:', pd.__version__)
print('GeoPandas:', gpd.__version__)
print('Folium:', folium.__version__)


Required Python libraries:
IPython: 5.1.0
Pandas: 0.20.3
GeoPandas: 0.2.1
Folium: 0.4.0

In [3]:
%%time

# set parquet data folder path
parquet_data_folder = '../data/crimes-2001-to-present.snappy.parq'
print('Loading crime data from: {}'.format(parquet_data_folder))

# load crimes parquet data into dask df
crimes = dd.read_parquet(parquet_data_folder, index='Date')

# load all data into memory
crimes = crimes.persist()
print('Crime data loaded into memory.')


Loading crime data from: ../data/crimes-2001-to-present.snappy.parq
Crime data loaded into memory.
Wall time: 10.2 s

In [4]:
# get Chicago community areas geo data
areas = gpd.read_file('../data/chicago-community-areas.geojson')

# drop unused columns
areas = areas.drop(['area', 'area_num_1', 'comarea', 'comarea_id',
                    'shape_area', 'shape_len', 'perimeter'], axis=1)

# rename area_numbe and convert it to int type 
# for crime dataframe merge on community area # later
areas = areas.rename(columns={'area_numbe': 'CommunityArea'})
areas['CommunityArea'] = areas['CommunityArea'].astype(np.int64)

print('Chicago Community Areas:')
print(areas.head())
print('...\nTotal Community Areas: {:,}\n...'.format(len(areas)))
areas.info()


Chicago Community Areas:
   CommunityArea        community  \
0             35          DOUGLAS   
1             36          OAKLAND   
2             37      FULLER PARK   
3             38  GRAND BOULEVARD   
4             39          KENWOOD   

                                            geometry  
0  (POLYGON ((-87.60914087617894 41.8446925026539...  
1  (POLYGON ((-87.59215283879394 41.8169293462668...  
2  (POLYGON ((-87.62879823733725 41.8018930336891...  
3  (POLYGON ((-87.6067081256125 41.81681377057218...  
4  (POLYGON ((-87.59215283879394 41.8169293462668...  
...
Total Community Areas: 77
...
<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 77 entries, 0 to 76
Data columns (total 3 columns):
CommunityArea    77 non-null int64
community        77 non-null object
geometry         77 non-null object
dtypes: int64(1), object(2)
memory usage: 1.9+ KB

In [5]:
# load Chicago community areas with sides info
# for plotting crime by Chicago 'sides'
community_areas = pd.read_csv('../data/chicago-community-areas.csv') #, index_col='CommunityName')
community_areas.head()

# get community crime stats
community_areas['Total'] = crimes.groupby('CommunityArea').size().compute().rename('Total')
community_crime = community_areas.dropna()

# print community crime stats
print('High Chicago Crime Communities:')
print(community_crime.sort_values(by='Total', ascending=False).head())
print('...\nTotal Communities: {:,}\n...'.format(len(community_crime)))
community_crime.info()


High Chicago Crime Communities:
    CommunityArea    CommunityName        Side   Total
25             25           Austin   West Side  370680
8               8  Near North Side     Central  192403
43             43      South Shore  South Side  189126
23             23    Humboldt Park   West Side  183219
24             24        West Town   West Side  171884
...
Total Communities: 77
...
<class 'pandas.core.frame.DataFrame'>
Int64Index: 77 entries, 1 to 77
Data columns (total 4 columns):
CommunityArea    77 non-null int64
CommunityName    77 non-null object
Side             77 non-null object
Total            77 non-null int64
dtypes: int64(2), object(2)
memory usage: 3.0+ KB
D:\tools\dev\python\Anaconda3-4.2.0\lib\site-packages\pandas\core\indexes\category.py:138: RuntimeWarning: Values and categories have different dtypes. Did you mean to use
'Categorical.from_codes(codes, categories)'?
  data = Categorical(data, categories=categories, ordered=ordered)

In [6]:
# merge areas geo dataframe with community crime data for mapping
community_crime_geo_df = areas.merge(community_crime, on='CommunityArea')
community_crime_geo_df.head()


Out[6]:
CommunityArea community geometry CommunityName Side Total
0 35 DOUGLAS (POLYGON ((-87.60914087617894 41.8446925026539... Douglas South Side 64059
1 36 OAKLAND (POLYGON ((-87.59215283879394 41.8169293462668... Oakland South Side 12661
2 37 FULLER PARK (POLYGON ((-87.62879823733725 41.8018930336891... Fuller Park South Side 19594
3 38 GRAND BOULEVARD (POLYGON ((-87.6067081256125 41.81681377057218... Grand Boulevard South Side 80586
4 39 KENWOOD (POLYGON ((-87.59215283879394 41.8169293462668... Kenwood South Side 33013

In [7]:
print('Folium v:', folium.__version__)

# create Chicago map
CHICAGO_COORDINATES = (41.85, -87.68)
map_attributions = ('&copy; <a href="http://www.openstreetmap.org/copyright">OpenStreetMap</a> '
        'contributors, &copy; <a href="http://cartodb.com/attributions">CartoDB</a>')
community_crime_map = folium.Map(location=CHICAGO_COORDINATES, 
                                 attr=map_attributions,
                                 tiles=None, #'Cartodb Positron', #'OpenStreetMap',
                                 zoom_start=10, min_zoom=10,
                                 control_scale=True)

# map Chicago communities crime
community_crime_map.choropleth(
    geo_data='../data/chicago-community-areas.geojson',
    data=community_crime_geo_df,
    columns=['community', 'Total'],
    key_on='feature.properties.community',
    line_opacity=0.3,
    fill_opacity=0.5,
    fill_color='YlOrRd',
    #threshold_scale=[10000, 50000, 100000, 200000, 300000, 400000],
    legend_name='Chicago Crime by Community (2001-2017)', 
    highlight=True, smooth_factor=2)

# add fullscreen toggle
plugins.Fullscreen(
    position='topright',
    title='full screen',
    title_cancel='exit full screen',
    force_separate_button=True).add_to(community_crime_map)

# add base map tile options
folium.TileLayer('OpenStreetMap').add_to(community_crime_map)
folium.TileLayer('stamentoner').add_to(community_crime_map)
folium.TileLayer('Cartodb Positron').add_to(community_crime_map)
folium.LayerControl().add_to(community_crime_map)

# save map for demo
community_crime_map.save(os.path.join('../maps/', 'chicago-community-crime-map.html'))

# show map
community_crime_map


Folium v: 0.4.0
Out[7]:

In [ ]: