In [13]:
import dask.dataframe as dd
from fbprophet import Prophet
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import pandas as pd
import seaborn as sns
In [14]:
%matplotlib inline
#%config InlineBackend.figure_format = 'svg'
# set neat seaborn whitegrid styles for matplotlib charts
plt.style.use('seaborn')
sns.set_style('whitegrid')
In [15]:
%%time
# set parquet data folder path
parquet_data_folder = '../data/crimes-2001-to-present.snappy.parq'
print('Loading crime data from: {}'.format(parquet_data_folder))
# load crimes parquet data into dask df
crimes = dd.read_parquet(parquet_data_folder, index='Date')
# load all data into memory
crimes = crimes.persist()
print('Crime data loaded into memory.')
In [16]:
# get homicides stats
crime_types = crimes[['PrimaryType']]
homicides = crime_types[(crime_types['PrimaryType']=='HOMICIDE')]
print(homicides.resample('M').count().compute())
In [17]:
# get crimes rolling sum for a smoother crime chart display
crimes_rolling_sum = homicides.resample('D').count().rolling(365).sum().compute()
crimes_rolling_sum.plot(figsize=(10,6), color='#cc0000')
plt.title('Rolling sum of Chicago Homicides over 365 days period (2001-2017)')
plt.xlabel('Days')
plt.ylabel('Number of Homicides')
ax = plt.gca()
ax.yaxis.set_major_formatter(ticker.StrMethodFormatter('{x:,.0f}'))
plt.show()
In [23]:
daily_homicides = homicides.resample('D').count().compute()
daily_homicides = daily_homicides.rename(columns={'PrimaryType': 'y'})
daily_homicides['ds'] = daily_homicides.index.values
daily_homicides.head()
Out[23]:
In [27]:
homicides_model = Prophet()
homicides_model.fit(daily_homicides)
future_homicides = homicides_model.make_future_dataframe(periods=365)
future_homicides.tail()
Out[27]:
In [28]:
homicides_forecast = homicides_model.predict(future_homicides)
homicides_forecast.tail()
Out[28]:
In [29]:
homicides_model.plot(homicides_forecast)
Out[29]:
In [31]:
homicides_model.plot_components(homicides_forecast)
Out[31]:
In [ ]: