In [104]:
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import datetime
from scipy.stats import pearsonr
from sklearn.kernel_ridge import KernelRidge
import seaborn as sns
sns.set(style="whitegrid")
np.set_printoptions(precision=4)
%matplotlib inline

In [105]:
def get_boro(x):
    return x.split(' - ')[-1]

def normalize(vals):
    return (vals - np.mean(vals)) / np.std(vals)

In [106]:
df = pd.read_csv('yearly_boro_kycd.csv', header=0, sep='\t', names=['year', 'boro', 'kycd', 'cnt']).dropna()
df = df[(df.year >= 2009) & (df.year <= 2012)]
df['boro'] = np.array([i.capitalize() for i in df.boro.values])
df['cnt'] = normalize(df.cnt.values)
# df.head()

In [107]:
hmless = pd.read_csv('Directory_Of_Homeless_Population_By_Year.csv')
hmless.columns = ['Year', 'Area', 'Count']
hmless['Count'] = normalize(hmless.Count.values)
# hmless.head()

In [108]:
hmless['Area'] = np.array([get_boro(i).strip() for i in hmless.Area.values])
hmless = hmless[hmless.Area.isin(['Manhattan', 'Bronx', 'Brooklyn', 'Queens', 'Staten Island'])]
hmless.sort_values(by=['Year', 'Area'], inplace=True)
# hmless.head()

In [110]:
# Boxplot
ax = plt.figure(figsize=(12, 6))
ax = sns.boxplot(x="year", y="cnt", hue='boro', data=df, palette=sns.color_palette("muted", 5),
                saturation=0.75, whis=1.5, linewidth=0.8)
ax.legend(bbox_to_anchor=(1.2, 0.8))
# plt.ylim(ymin=0)
plt.ylabel('Crime Incidents', fontsize = 16)
plt.xlabel('Year', fontsize = 16)
plt.title('Crime Types in NYC Boroughs Over Time', fontsize = 20)
plt.yticks([])
plt.show()



In [103]:
# Stripplot
ax = plt.figure(figsize=(12, 5))
ax = sns.stripplot(x="year", y="cnt", hue="boro", data=df, jitter=1, marker="o", size=9, alpha=0.7,
                  palette=sns.color_palette("pastel", 5))
ax = sns.stripplot(x="Year", y="Count", hue="Area", data=hmless, jitter=1, marker="D", size=11, alpha=0.9, 
                   palette=sns.color_palette("bright", 5))
# plt.ylim(ymin=0)
ax.legend(bbox_to_anchor=(1.2, 0.8))
plt.ylabel('Counts', fontsize = 16)
plt.xlabel('Year', fontsize = 16)
plt.title('Crime Types in NYC Boroughs Over Time', fontsize = 20)
plt.yticks([])
plt.text(0.4, 3.6, "O: Crime Incidents by Type      ◆: Number of Homeless People", fontsize=12)
plt.show()



In [ ]: