In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns
sns.set_style("whitegrid")
%matplotlib inline
In [2]:
col_names = ['month', 'KY_CD', 'cnt']
df = pd.read_csv('month_valid.csv', header=None, names=col_names, dtype=int)
In [3]:
# Get the most frequent 10 types of crimes
df2 = df.groupby('KY_CD').sum()
KYCD_TOP10 = df2.sort_values(by='cnt', ascending=False).head(10).index.values
In [4]:
df.shape
Out[4]:
In [5]:
df_top10 = df[df.KY_CD.isin(KYCD_TOP10)]
df_top10.head()
Out[5]:
In [6]:
pivot_table = df_top10.pivot('KY_CD', 'month', 'cnt')
pivot_table
Out[6]:
In [7]:
df_of = pd.read_csv('kycd_OfnsDesc.csv')
df_of[df_of.KY_CD.isin(KYCD_TOP10)].loc[:, ['KY_CD', 'OFNS_DESC']].dropna().values
dic = {105:'ROBBERY',
106:'FELONY ASSAULT',
107:'BURGLARY',
109:'GRAND LARCENY',
235:'DANGEROUS DRUGS',
341:'PETIT LARCENY',
344:'ASSAULT 3',
351:'CRIMINAL MISCHIEF',
361:'OFF. AGNST PUB ORD',
578:'HARRASSMENT 2'}
In [8]:
y_labels = []
for i in pivot_table.index.values:
y_labels.append(dic[i])
pivot_table.index = y_labels
pivot_table
Out[8]:
In [9]:
fig = plt.figure(figsize=(10, 10))
# use pivot table to plot heatmap
sns.heatmap(pivot_table,
annot=True,
fmt='.0f',
linewidths=0,
square=True,
cmap="RdBu_r",
cbar_kws={"orientation": "horizontal"})
plt.title('Monthly Crime Incidents Heatmap\n', size = 20)
plt.xlabel('Month', size = 16)
plt.ylabel('Crime Type', size = 16)
plt.yticks(rotation=-15)
plt.show()
In [10]:
ttl = df.groupby(df.month).sum().cnt.values
month = range(1, 13)
month_name = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
pool = [341, 578, 344, 351, 109, 235]
pool_desc = ['Petit Larceny', 'Harrassment 2', 'Assault 3', 'Criminal Mischief', 'Grand Larceny', 'Dangerous Drugs']
df_top6 = df[df.KY_CD.isin(pool)]
lst = []
for i in pool:
lst.append(df_top6[df_top6.KY_CD==i].cnt.values)
In [11]:
# bar plot
plt.figure(figsize=(16, 6))
c = sns.color_palette("muted", 6)
c2 = sns.color_palette("deep", 6)
N = len(month)
ind = np.arange(N)
w = 0.15
for i in range(6):
plt.bar(ind+w*i, lst[i], w, color=c[i], label=pool_desc[i], alpha=0.3)
for i in range(6):
plt.plot(ind+w*(i+.5), lst[i], "o-", color=c2[i], label=pool_desc[i], linewidth=3)
plt.xlabel('Month')
plt.ylabel('Crime Occurrence')
plt.title('Top-6 Crime Type Over Month', fontsize=16)
plt.xticks(ind+3*w, month_name)
lgd = plt.legend(bbox_to_anchor=(1.03, 0.8), loc=2, borderaxespad=0.)
plt.savefig('kycd_month_bar.png', bbox_extra_artists=(lgd,), bbox_inches='tight')
plt.show()
In [12]:
plt.figure(figsize=(12, 4))
plt.plot(month, ttl, "o-", linewidth=3, fillstyle='full', color='skyblue')
plt.xticks(month, month_name)
plt.title('Total Crime Occurrence Over Month')
plt.xlabel('Month')
plt.ylabel('Crime Occurrence')
for i in range(len(ttl)):
plt.text(month[i]-0.1, ttl[i]+2000, str(ttl[i]), fontsize=12, color='steelblue')
plt.fill_between(month, ttl, 250000, alpha=0.3, color='skyblue')
plt.tight_layout()
plt.savefig('ttl_month.png')
plt.show()
In [13]:
pool_10 = [341, 578, 344, 351, 109, 235, 361, 105, 107, 106]
pool_desc_10 = ['Petit Larceny', 'Harrassment 2', 'Assault 3', 'Criminal Mischief', 'Grand Larceny', 'Dangerous Drugs',
'Offenses Against Public Order', 'Robbery', 'Burglary', 'Felony Assault']
df_tops_10 = df[df.KY_CD.isin(pool_10)]
lst = []
for i in pool_10:
lst.append(df_tops_10[df_tops_10.KY_CD==i].cnt.values)
plt.figure(figsize=(12, 6))
c3 = sns.cubehelix_palette(10, dark=0.35, light=0.9, reverse=True)
for i in range(10):
plt.plot(month, lst[i], "o-", label=pool_desc_10[i], color=c3[i], linewidth=3)
plt.title('Top 10 Crime Type Over Month')
plt.xlim(1)
plt.xticks(month, month_name)
plt.xlabel('Month')
plt.ylabel('Crime Occurrence')
lgd = plt.legend(bbox_to_anchor=(1.03, 0.7), loc=2, borderaxespad=0.)
plt.savefig('kycd_month_line.png', bbox_extra_artists=(lgd,), bbox_inches='tight')
plt.show()
In [14]:
from IPython.display import HTML
HTML('''<script>
code_show=true;
function code_toggle() {
if (code_show){
$('div.input').hide();
} else {
$('div.input').show();
}
code_show = !code_show
}
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()"><input type="submit" value="Click here to toggle on/off the raw code."></form>''')
Out[14]: