In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns
sns.set_style("whitegrid")
%matplotlib inline
In [2]:
col_names = ['hour', 'KY_CD', 'cnt']
df = pd.read_csv('hour_valid.csv', header=None, names=col_names, dtype=int)
In [3]:
ttl = df.groupby(df.hour).sum().cnt.values
hour = range(24)
pool = [341, 578, 344, 351, 109, 235]
pool_desc = ['Petit Larceny', 'Harrassment 2', 'Assault 3', 'Criminal Mischief', 'Grand Larceny', 'Dangerous Drugs']
df_top6 = df[df.KY_CD.isin(pool)]
lst = []
for i in pool:
lst.append(df_top6[df_top6.KY_CD==i].cnt.values)
In [11]:
# bar plot
plt.figure(figsize=(15, 6))
c = sns.color_palette("muted", 6)
c2 = sns.color_palette("deep", 6)
N = len(hour)
ind = np.arange(N)
w = 0.15
for i in range(6):
plt.bar(ind+w*i, lst[i], w, color=c[i], label=pool_desc[i], alpha=0.3)
for i in range(6):
plt.plot(ind+w*(i+.5), lst[i], "o-", color=c2[i], label=pool_desc[i], linewidth=2)
plt.xlabel('Hour')
plt.ylabel('Crime Occurrence')
plt.title('Top-6 Crime Type Over Hours', fontsize=16)
plt.xticks(ind, [str(x) for x in hour])
plt.xlim(xmax=24)
lgd = plt.legend(bbox_to_anchor=(1.03, 0.8), loc=2, borderaxespad=0.)
plt.savefig('kycd_hour_bar.png', bbox_extra_artists=(lgd,), bbox_inches='tight')
plt.show()
In [10]:
plt.figure(figsize=(12, 4))
plt.plot(hour, ttl, "o-", linewidth=3, fillstyle='full', color='skyblue')
plt.xticks(ind, [str(x) for x in hour])
plt.title('Total Crime Occurrence Over Hours')
plt.xlabel('Hour')
plt.ylabel('Crime Occurrence')
for i in range(len(ttl)):
plt.text(hour[i], ttl[i]+10000, str(ttl[i]//1000)+'k', fontsize=11, color='steelblue')
plt.fill_between(hour, ttl, 0, alpha=0.3, color='skyblue')
plt.tight_layout()
plt.savefig('ttl_hour.png')
plt.show()
In [15]:
pool_10 = [341, 578, 344, 351, 109, 235, 361, 105, 107, 106]
pool_desc_10 = ['Petit Larceny', 'Harrassment 2', 'Assault 3', 'Criminal Mischief', 'Grand Larceny', 'Dangerous Drugs',
'Offenses Against Public Order', 'Robbery', 'Burglary', 'Felony Assault']
df_tops_10 = df[df.KY_CD.isin(pool_10)]
lst = []
for i in pool_10:
lst.append(df_tops_10[df_tops_10.KY_CD==i].cnt.values)
plt.figure(figsize=(12, 6))
c3 = sns.cubehelix_palette(10, dark=0.35, light=0.9, reverse=True)
for i in range(10):
plt.plot(hour, lst[i], "o-", label=pool_desc_10[i], color=c3[i], linewidth=1.5)
plt.title('Top 10 Crime Type Over Month')
plt.xlim(1)
plt.xticks(hour, [str(x) for x in hour])
plt.xlabel('Day of Week')
plt.ylabel('Crime Occurrence')
lgd = plt.legend(bbox_to_anchor=(1.03, 0.7), loc=2, borderaxespad=0.)
plt.savefig('kycd_hour_line.png', bbox_extra_artists=(lgd,), bbox_inches='tight')
plt.show()