In [60]:
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import statsmodels.api as sm
from statsmodels.formula.api import ols
from statsmodels.formula.api import logit
import pylab as pl
import seaborn as sns
mpl.style.use('fivethirtyeight')
%matplotlib inline
In [61]:
df=pd.read_csv('Sample_Crime_Incident_Reports_Cleaned01.csv',low_memory=False).dropna()
In [62]:
df[:5]
Out[62]:
In [63]:
df.describe()
Out[63]:
In [64]:
#creating a remapped shooting variable
df['Shoot_Status']=df['Shooting'].map({'No':0,'Yes':1}).astype(int)
In [65]:
df[:5]
Out[65]:
In [66]:
main_crimecode_dummies=pd.get_dummies(df['MAIN_CRIMECODE'], prefix='class').iloc[:, 1:]
list(main_crimecode_dummies.columns.values)
Out[66]:
In [67]:
reporting_district_dummies=pd.get_dummies(df['REPTDISTRICT'], prefix='class').iloc[:, 1:]
list(reporting_district_dummies.columns.values)
Out[67]:
In [68]:
data=df.join([main_crimecode_dummies,reporting_district_dummies])
data[:5]
Out[68]:
In [69]:
model = ols(data=data, formula='Shoot_Status~class_Dorchester+class_Downtown+class_Downtown5+class_EastBoston+class_HydePark+class_JamaicaPlain+class_Mattapan+class_Roxbury+class_SouthBoston+class_SouthEnd+class_WestRoxbury')
result = model.fit()
In [70]:
result.summary()
Out[70]:
In [71]:
residuals=result.resid
sns.distplot(residuals)
Out[71]:
In [72]:
# Checking the residuals
fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(10,5))
sns.distplot(residuals, ax=axes[0]);
sm.qqplot(residuals, fit=True, line='s', ax=axes[1]);
In [73]:
model = ols(data=data, formula='Shoot_Status~class_Dorchester+class_Mattapan+class_Roxbury')
result = model.fit()
result.summary()
Out[73]:
In [74]:
residuals=result.resid
sns.distplot(residuals)
Out[74]:
In [75]:
model = ols(data=data, formula='Shoot_Status~class_01xx +class_03xx +class_04xx +class_05CB +class_05RB +class_06MV +class_06xx +class_07RV+class_07xx +class_08xx +class_09xx +class_10xx +class_11xx +class_12xx +class_13xx + class_14xx +class_15xx +class_16xx +class_18xx +class_20xx +class_21xx +class_22xx +class_24xx +class_32GUN +class_Argue +class_Arrest +class_BENoProp +class_Ballist +class_Bomb + class_BurgTools +class_Explos +class_FIRE +class_Gather +class_Harass +class_Harbor +class_Hazardous +class_InvPer +class_InvProp +class_InvVeh +class_LICViol +class_Labor +class_Landlord +class_MVAcc+class_Manslaug +class_MedAssist +class_OTHER +class_PRISON +class_PersLoc +class_PersMiss +class_PhoneCalls +class_Plates +class_PropDam +class_PropFound +class_PropLost +class_PubDrink +class_Restrain +class_Runaway +class_SearchWarr +class_Service +class_SexReg +class_SkipFare +class_TOWED +class_TRESPASS +class_VAL')
result = model.fit()
result.summary()
Out[75]:
In [76]:
residuals=result.resid
sns.distplot(residuals)
Out[76]:
In [77]:
# Checking the residuals
fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(10,5))
sns.distplot(residuals, ax=axes[0]);
sm.qqplot(residuals, fit=True, line='s', ax=axes[1]);
In [ ]: