In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns
sns.set_style("whitegrid")
%matplotlib inline

In [2]:
col_names = ['month', 'KY_CD', 'cnt']
df = pd.read_csv('month_valid.csv', header=None, names=col_names, dtype=int)

In [3]:
# Get the most frequent 10 types of crimes
df2 = df.groupby('KY_CD').sum()
KYCD_TOP10 = df2.sort_values(by='cnt', ascending=False).head(10).index.values

In [4]:
df.shape


Out[4]:
(813, 3)

In [5]:
df_top10 = df[df.KY_CD.isin(KYCD_TOP10)]
df_top10.head()


Out[5]:
month KY_CD cnt
0 1 341 48750
1 1 578 29314
2 1 351 27874
3 1 344 27302
4 1 109 25541

In [6]:
pivot_table = df_top10.pivot('KY_CD', 'month', 'cnt')
pivot_table


Out[6]:
month 1 2 3 4 5 6 7 8 9 10 11 12
KY_CD
105 11969 8875 10113 9843 11501 11372 12555 12623 12616 13617 12460 12953
106 9433 8286 10105 10349 12069 12189 12665 12281 11359 11008 10242 10037
107 14249 11810 12619 12426 14331 14098 15476 16155 15204 16079 15086 15656
109 25541 21743 25024 25280 27777 28579 29541 30271 29186 30458 28665 29704
235 16096 15434 16878 16512 16663 15429 15993 17277 16719 17035 14270 12610
341 48750 41109 48892 49553 55323 55477 59201 59394 55884 56719 52099 51607
344 27302 25288 30254 29716 33733 32696 33594 32809 30796 29873 28089 27349
351 27874 23275 28296 28600 29550 30002 30688 30410 27926 29647 26405 25955
361 16571 15019 16866 16406 16379 15876 16117 16444 15989 15720 14895 14463
578 29314 26295 30949 31218 34622 34431 35295 34045 34416 33911 30650 29718

In [7]:
df_of = pd.read_csv('kycd_OfnsDesc.csv')
df_of[df_of.KY_CD.isin(KYCD_TOP10)].loc[:, ['KY_CD', 'OFNS_DESC']].dropna().values

dic = {105:'ROBBERY',
       106:'FELONY ASSAULT',
       107:'BURGLARY',
       109:'GRAND LARCENY',
       235:'DANGEROUS DRUGS',
       341:'PETIT LARCENY',
       344:'ASSAULT 3',
       351:'CRIMINAL MISCHIEF',
       361:'OFF. AGNST PUB ORD',
       578:'HARRASSMENT 2'}

In [8]:
y_labels = []
for i in pivot_table.index.values:
    y_labels.append(dic[i])
    
pivot_table.index = y_labels
pivot_table


Out[8]:
month 1 2 3 4 5 6 7 8 9 10 11 12
ROBBERY 11969 8875 10113 9843 11501 11372 12555 12623 12616 13617 12460 12953
FELONY ASSAULT 9433 8286 10105 10349 12069 12189 12665 12281 11359 11008 10242 10037
BURGLARY 14249 11810 12619 12426 14331 14098 15476 16155 15204 16079 15086 15656
GRAND LARCENY 25541 21743 25024 25280 27777 28579 29541 30271 29186 30458 28665 29704
DANGEROUS DRUGS 16096 15434 16878 16512 16663 15429 15993 17277 16719 17035 14270 12610
PETIT LARCENY 48750 41109 48892 49553 55323 55477 59201 59394 55884 56719 52099 51607
ASSAULT 3 27302 25288 30254 29716 33733 32696 33594 32809 30796 29873 28089 27349
CRIMINAL MISCHIEF 27874 23275 28296 28600 29550 30002 30688 30410 27926 29647 26405 25955
OFF. AGNST PUB ORD 16571 15019 16866 16406 16379 15876 16117 16444 15989 15720 14895 14463
HARRASSMENT 2 29314 26295 30949 31218 34622 34431 35295 34045 34416 33911 30650 29718

In [9]:
fig = plt.figure(figsize=(10, 10))

# use pivot table to plot heatmap
sns.heatmap(pivot_table,
            annot=True,
            fmt='.0f',
            linewidths=0,
            square=True,
            cmap="RdBu_r",
            cbar_kws={"orientation": "horizontal"})
plt.title('Monthly Crime Incidents Heatmap\n', size = 20)
plt.xlabel('Month', size = 16)
plt.ylabel('Crime Type', size = 16)
plt.yticks(rotation=-15)

plt.show()




In [10]:
ttl = df.groupby(df.month).sum().cnt.values
month = range(1, 13)
month_name = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
pool = [341, 578, 344, 351, 109, 235]
pool_desc = ['Petit Larceny', 'Harrassment 2', 'Assault 3', 'Criminal Mischief', 'Grand Larceny', 'Dangerous Drugs']
df_top6 = df[df.KY_CD.isin(pool)]

lst = []
for i in pool:
    lst.append(df_top6[df_top6.KY_CD==i].cnt.values)

In [11]:
# bar plot
plt.figure(figsize=(16, 6))
c = sns.color_palette("muted", 6)
c2 = sns.color_palette("deep", 6)

N = len(month)
ind = np.arange(N)
w = 0.15

for i in range(6):
    plt.bar(ind+w*i, lst[i], w, color=c[i], label=pool_desc[i], alpha=0.3)
for i in range(6):
    plt.plot(ind+w*(i+.5), lst[i], "o-", color=c2[i], label=pool_desc[i], linewidth=3)

plt.xlabel('Month')
plt.ylabel('Crime Occurrence')
plt.title('Top-6 Crime Type Over Month', fontsize=16)
plt.xticks(ind+3*w, month_name)
lgd = plt.legend(bbox_to_anchor=(1.03, 0.8), loc=2, borderaxespad=0.)
plt.savefig('kycd_month_bar.png', bbox_extra_artists=(lgd,), bbox_inches='tight')
plt.show()



In [12]:
plt.figure(figsize=(12, 4))
plt.plot(month, ttl, "o-", linewidth=3, fillstyle='full', color='skyblue')
plt.xticks(month, month_name)
plt.title('Total Crime Occurrence Over Month')
plt.xlabel('Month')
plt.ylabel('Crime Occurrence')
for i in range(len(ttl)):
    plt.text(month[i]-0.1, ttl[i]+2000, str(ttl[i]), fontsize=12, color='steelblue')
plt.fill_between(month, ttl, 250000, alpha=0.3, color='skyblue')
plt.tight_layout()
plt.savefig('ttl_month.png')
plt.show()



In [13]:
pool_10 = [341, 578, 344, 351, 109, 235, 361, 105, 107, 106]
pool_desc_10 = ['Petit Larceny', 'Harrassment 2', 'Assault 3', 'Criminal Mischief', 'Grand Larceny', 'Dangerous Drugs',
               'Offenses Against Public Order', 'Robbery', 'Burglary', 'Felony Assault']
df_tops_10 = df[df.KY_CD.isin(pool_10)]

lst = []
for i in pool_10:
    lst.append(df_tops_10[df_tops_10.KY_CD==i].cnt.values)

plt.figure(figsize=(12, 6))
c3 = sns.cubehelix_palette(10, dark=0.35, light=0.9, reverse=True)
for i in range(10):
    plt.plot(month, lst[i], "o-", label=pool_desc_10[i], color=c3[i], linewidth=3)

plt.title('Top 10 Crime Type Over Month')
plt.xlim(1)
plt.xticks(month, month_name)
plt.xlabel('Month')
plt.ylabel('Crime Occurrence')
lgd = plt.legend(bbox_to_anchor=(1.03, 0.7), loc=2, borderaxespad=0.)
plt.savefig('kycd_month_line.png', bbox_extra_artists=(lgd,), bbox_inches='tight')
plt.show()



In [14]:
from IPython.display import HTML

HTML('''<script>
code_show=true; 
function code_toggle() {
 if (code_show){
 $('div.input').hide();
 } else {
 $('div.input').show();
 }
 code_show = !code_show
} 
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()"><input type="submit" value="Click here to toggle on/off the raw code."></form>''')


Out[14]: