In [ ]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

In [ ]:
Crime_5_7 = pd.read_csv('Chicago_Crimes_2005_to_2007.csv.',
                        na_values = [None, 'NaN','Nothing'], header = 0) 
Crime_8_11 = pd.read_csv('Chicago_Crimes_2008_to_2011.csv.',
                        na_values = [None, 'NaN','Nothing'], header = 0) 
Crime_12_17 = pd.read_csv('Chicago_Crimes_2012_to_2017.csv.',
                        na_values = [None, 'NaN','Nothing'], header = 0)

In [3]:
Crime_Data = [Crime_5_7, Crime_8_11, Crime_12_17]
del Crime_5_7
del Crime_8_11
del Crime_12_17

In [4]:
Crime_Data = pd.concat(Crime_Data,axis = 0)

In [5]:
Crime_Data.drop_duplicates(subset=['ID', 'Case Number'], inplace=True)

In [6]:
Crime_Data.drop(['Unnamed: 0','Case Number','IUCR','FBI Code','Updated On','Location',
                 'X Coordinate','Y Coordinate','Location'], inplace = True, axis = 1)

In [7]:
Crime_Data.Date = pd.to_datetime(Crime_Data.Date, format = '%m/%d/%Y %I:%M:%S %p')
Crime_Data.index = pd.DatetimeIndex(Crime_Data.Date)

In [8]:
Crime_Data['Primary Type'] = pd.Categorical(Crime_Data['Primary Type'])
Crime_Data['Description'] = pd.Categorical(Crime_Data['Description'])
Crime_Data['Location Description'] = pd.Categorical(Crime_Data['Location Description'])

In [9]:
#make graph of crimes per year
plt.figure(figsize = (8,5))
Crime_Data.groupby([Crime_Data.index.year]).size().plot.bar()
plt.title('Crime Over the Years')
plt.xlabel('Year')
plt.ylabel('Number of Crimes Acts')
plt.show()
plt.figure(figsize = (8,5))
#make graph of crimes per month
Crime_Data.groupby([Crime_Data.index.month]).size().plot.bar()
plt.title('Crime Per Month')
plt.xlabel('Month')
plt.ylabel('Number of Crimes Acts')
plt.show()
#graph of crimes per day
Crime_Data.groupby([Crime_Data.index.day]).size().plot.bar()
plt.title('Crime Per Day Of Month')
plt.xlabel('Day')
plt.ylabel('Number of Crimes Acts')
plt.show()
#graph of crimes per hour
Crime_Data.groupby([Crime_Data.index.hour]).size().plot.bar()
plt.title('Crime Per Hour')
plt.xlabel('Hour')
plt.ylabel('Number of Crimes Acts')
plt.show()



In [10]:
Crime_Data_date = Crime_Data.pivot_table('ID', aggfunc = np.size, columns = 'Primary Type', 
                                         index = Crime_Data.index.date, fill_value = 0)
Crime_Data_date.index = pd.DatetimeIndex(Crime_Data_date.index)

In [11]:
#make graph of different types of crimes per month
Plot = Crime_Data_date.plot(figsize = (20,30), subplots = True, layout = (6,6),
                                  sharex = False, sharey = False)
plt.show()



In [12]:
#plot of the relatives amounts of each type of crime
plt.figure(figsize = (8,12))
Crime_Data.groupby(Crime_Data['Primary Type']).size().sort_values(ascending = True).plot(kind = 'barh')
plt.xlabel('Number of Crimes Committed')
plt.show()



In [13]:
#remove NAs from Longitude and Latitude data
Crime_Data = Crime_Data.dropna(axis = 0, how = 'any')

In [14]:
#extract year data for each crime, plot using matplotlib for each year the latitude and longitude variables
#make color by Description column
Groups = Crime_Data.groupby(Crime_Data['Primary Type'])
Groups = dict(list(Groups))
Keys = list(Groups.keys())

In [15]:
for key in Keys:
    plt.figure(figsize = (4,4))
    plt.scatter(Groups[key].Longitude, Groups[key].Latitude, marker = '.')
    plt.ylim(41.6,42.1)
    plt.xlim(-88,-87.4)
    plt.title(key)
    plt.ylabel('Latitude')
    plt.xlabel('Longitude')
    plt.show()



In [16]:
#Try to predict arrest based on data - NaiveBayes
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import cross_val_score
Arrest_Data = Crime_Data.drop('Arrest', axis = 1)
Arrest_Data = Arrest_Data.drop('Date', axis = 1)
Arrest_Data = Arrest_Data.drop('Block', axis = 1)
Arrest_Target = Crime_Data['Arrest']

In [17]:
Arrest_Data['Primary Type'] = (Arrest_Data['Primary Type']).cat.codes
Arrest_Data['Location Description'] = (Arrest_Data['Location Description']).cat.codes
Arrest_Data['Description'] = (Arrest_Data['Description']).cat.codes

In [18]:
gnb = GaussianNB()
scores = cross_val_score(gnb, Arrest_Data, Arrest_Target)
scores.mean()


Out[18]:
0.68343045922607271

In [19]:
#look at patterns in crime movement across chicago
for key in Keys:
    i = 0
    Date_Groups = Groups[key].groupby(Groups[key].index.year)
    Date_Groups = dict(list(Date_Groups))
    Date_Keys = list(Date_Groups.keys())
    f, ax = plt.subplots(1,13)
    f.set_figheight(6)
    f.set_figwidth(35)
    for dkeys in Date_Keys:
        ax[i].scatter(Date_Groups[dkeys].Longitude, Date_Groups[dkeys].Latitude, marker = '.')
        ax[i].set_ylim(41.6,42.1)
        ax[i].set_xlim(-88,-87.4)
        f.subplots_adjust(hspace=2)
        s = key + "\n" + str(dkeys)
        ax[i].set_title(s)
        ax[0].set_ylabel('Latitude')
        ax[i].set_xlabel('Longitude')
        f.subplots_adjust(hspace = 2)
        i = i + 1
    plt.show()



In [20]:
Crime_Data_district = Crime_Data.pivot_table('Arrest', aggfunc = np.sum, columns = ['District'], 
                                         index = Crime_Data.index.date, fill_value = 0)
Crime_Data_ward = Crime_Data.pivot_table('Arrest', aggfunc = np.sum, columns = ['Ward'], 
                                         index = Crime_Data.index.date, fill_value = 0)
Crime_Data_ca = Crime_Data.pivot_table('Arrest', aggfunc = np.sum, columns = ['Community Area'], 
                                         index = Crime_Data.index.date, fill_value = 0)

In [21]:
import matplotlib.patches as mpatches
red = mpatches.Patch(color='red', label='Amount of Arrests')
blue = mpatches.Patch(color='blue', label='Amount of Crime')


#crime per district
plt.figure(figsize = (8,5))
plt.hold(True)
Crime_Data.groupby(Crime_Data['District']).size().plot(kind = 'bar')
Crime_Data_district.sum().plot(kind = 'bar', color = 'r')
plt.title('District Data')
plt.legend(handles=[red, blue])
plt.hold(False)
#crime per ward
plt.figure(figsize = (8,5))
plt.hold(True)
Crime_Data.groupby(Crime_Data['Ward']).size().plot(kind = 'bar')
Crime_Data_ward.sum().plot(kind = 'bar', color = 'r')
plt.title('Ward Data')
plt.legend(handles=[red, blue])
plt.hold(False)
#crime per Community Area
plt.figure(figsize = (12,5))
plt.hold(True)
Crime_Data.groupby(Crime_Data['Community Area']).size().plot(kind = 'bar')
Crime_Data_ca.sum().plot(kind = 'bar', color = 'r')
plt.title('Community Area Data')
plt.legend(handles=[red, blue])
plt.hold(False)
plt.show()


C:\Users\kodur\Anaconda3\lib\site-packages\ipykernel_launcher.py:8: MatplotlibDeprecationWarning: pyplot.hold is deprecated.
    Future behavior will be consistent with the long-time default:
    plot commands add elements without first clearing the
    Axes and/or Figure.
  
C:\Users\kodur\Anaconda3\lib\site-packages\matplotlib\__init__.py:917: UserWarning: axes.hold is deprecated. Please remove it from your matplotlibrc and/or style files.
  warnings.warn(self.msg_depr_set % key)
C:\Users\kodur\Anaconda3\lib\site-packages\matplotlib\rcsetup.py:152: UserWarning: axes.hold is deprecated, will be removed in 3.0
  warnings.warn("axes.hold is deprecated, will be removed in 3.0")
C:\Users\kodur\Anaconda3\lib\site-packages\ipykernel_launcher.py:13: MatplotlibDeprecationWarning: pyplot.hold is deprecated.
    Future behavior will be consistent with the long-time default:
    plot commands add elements without first clearing the
    Axes and/or Figure.
  del sys.path[0]
C:\Users\kodur\Anaconda3\lib\site-packages\ipykernel_launcher.py:16: MatplotlibDeprecationWarning: pyplot.hold is deprecated.
    Future behavior will be consistent with the long-time default:
    plot commands add elements without first clearing the
    Axes and/or Figure.
  app.launch_new_instance()
C:\Users\kodur\Anaconda3\lib\site-packages\ipykernel_launcher.py:21: MatplotlibDeprecationWarning: pyplot.hold is deprecated.
    Future behavior will be consistent with the long-time default:
    plot commands add elements without first clearing the
    Axes and/or Figure.
C:\Users\kodur\Anaconda3\lib\site-packages\ipykernel_launcher.py:24: MatplotlibDeprecationWarning: pyplot.hold is deprecated.
    Future behavior will be consistent with the long-time default:
    plot commands add elements without first clearing the
    Axes and/or Figure.
C:\Users\kodur\Anaconda3\lib\site-packages\ipykernel_launcher.py:29: MatplotlibDeprecationWarning: pyplot.hold is deprecated.
    Future behavior will be consistent with the long-time default:
    plot commands add elements without first clearing the
    Axes and/or Figure.

In [22]:
#Arrest rates per community area, ward, and district
A_R_D = Crime_Data_district.sum() / Crime_Data.groupby(Crime_Data['District']).size()
A_R_W = Crime_Data_ward.sum() / Crime_Data.groupby(Crime_Data['Ward']).size()
A_R_CA = Crime_Data_ca.sum() / Crime_Data.groupby(Crime_Data['Community Area']).size()

plt.figure(figsize = (8,5))
A_R_D.sort_values().plot(kind = 'barh', color = 'k')
plt.title('Arrest Rates Per District', color = 'b')
plt.figure(figsize = (8,10))
A_R_W.sort_values().plot(kind = 'barh', color = 'k')
plt.title('Arrest Rates Per Ward', color = 'b')
plt.figure(figsize=(8,12))
A_R_CA.sort_values().plot(kind = 'barh', color = 'k')
plt.title('Arrest Rates Per Community Area', color = 'b')
plt.show()



In [28]:
#Arrest rates per crime type
Crime_Data_Type = Crime_Data.pivot_table('Arrest', aggfunc = np.sum, columns = ['Primary Type'], 
                                         index = Crime_Data.index.date, fill_value = 0)
#crime per district
plt.figure(figsize = (8,8))
plt.hold(True)
Crime_Data.groupby(Crime_Data['Primary Type']).size().plot(kind = 'barh')
Crime_Data_Type.sum().plot(kind = 'barh', color = 'r')
plt.title('Crime Type Data')
plt.legend(handles=[red, blue])
plt.hold(False)

plt.figure(figsize = (10,8))
A_R_PT = Crime_Data_Type.sum() / Crime_Data.groupby(Crime_Data['Primary Type']).size()
A_R_PT.sort_values().plot(kind = 'barh', color = 'k')
plt.title('Arrest Rates Per Crime Type')
plt.show()


C:\Users\kodur\Anaconda3\lib\site-packages\ipykernel_launcher.py:6: MatplotlibDeprecationWarning: pyplot.hold is deprecated.
    Future behavior will be consistent with the long-time default:
    plot commands add elements without first clearing the
    Axes and/or Figure.
  
C:\Users\kodur\Anaconda3\lib\site-packages\matplotlib\__init__.py:917: UserWarning: axes.hold is deprecated. Please remove it from your matplotlibrc and/or style files.
  warnings.warn(self.msg_depr_set % key)
C:\Users\kodur\Anaconda3\lib\site-packages\matplotlib\rcsetup.py:152: UserWarning: axes.hold is deprecated, will be removed in 3.0
  warnings.warn("axes.hold is deprecated, will be removed in 3.0")
C:\Users\kodur\Anaconda3\lib\site-packages\ipykernel_launcher.py:11: MatplotlibDeprecationWarning: pyplot.hold is deprecated.
    Future behavior will be consistent with the long-time default:
    plot commands add elements without first clearing the
    Axes and/or Figure.
  # This is added back by InteractiveShellApp.init_path()

In [54]:
A = Crime_Data_Type.sum() / Crime_Data.groupby(Crime_Data['Primary Type']).size()

In [55]:
mean_A = Crime_Data['Arrest'].sum() / Crime_Data.shape[0]

In [56]:
stderr_A = np.std(A) / (Crime_Data_Type.shape[1]) ** (1/2.0)

In [59]:
high_A = mean_A + 1.96 * stderr_A
low_A = mean_A - 1.96 * stderr_A
print(A)
print("Average Arrest rate is 95% between (", low_A, ",", high_A, ")")


Primary Type
ARSON                                0.119337
ASSAULT                              0.238993
BATTERY                              0.230941
BURGLARY                             0.055561
CONCEALED CARRY LICENSE VIOLATION    0.857143
CRIM SEXUAL ASSAULT                  0.144636
CRIMINAL DAMAGE                      0.068888
CRIMINAL TRESPASS                    0.726034
DECEPTIVE PRACTICE                   0.167436
GAMBLING                             0.991170
HOMICIDE                             0.426238
HUMAN TRAFFICKING                    0.100000
INTERFERENCE WITH PUBLIC OFFICER     0.926527
INTIMIDATION                         0.179982
KIDNAPPING                           0.108793
LIQUOR LAW VIOLATION                 0.986473
MOTOR VEHICLE THEFT                  0.077443
NARCOTICS                            0.991412
NON - CRIMINAL                       0.157895
NON-CRIMINAL                         0.087500
NON-CRIMINAL (SUBJECT SPECIFIED)     0.250000
OBSCENITY                            0.812500
OFFENSE INVOLVING CHILDREN           0.197385
OTHER NARCOTIC VIOLATION             0.662921
OTHER OFFENSE                        0.173915
PROSTITUTION                         0.993670
PUBLIC INDECENCY                     0.990741
PUBLIC PEACE VIOLATION               0.709065
RITUALISM                            0.111111
ROBBERY                              0.099414
SEX OFFENSE                          0.308391
STALKING                             0.154143
THEFT                                0.114993
WEAPONS VIOLATION                    0.799763
dtype: float64
Average Arrest rate is 95% between ( 0.160009407597 , 0.397138527043 )

In [64]:
for i in range(len(A)):
    if (A[i] <= low_A or A[i] >= high_A):
        print(A.index[i], A[i])


ARSON 0.119337184555
BURGLARY 0.0555608650277
CONCEALED CARRY LICENSE VIOLATION 0.857142857143
CRIM SEXUAL ASSAULT 0.144635595871
CRIMINAL DAMAGE 0.0688875109322
CRIMINAL TRESPASS 0.726033565289
GAMBLING 0.991170201969
HOMICIDE 0.426237964237
HUMAN TRAFFICKING 0.1
INTERFERENCE WITH PUBLIC OFFICER 0.926527316767
KIDNAPPING 0.108792846498
LIQUOR LAW VIOLATION 0.986473312211
MOTOR VEHICLE THEFT 0.0774434623716
NARCOTICS 0.991412154345
NON - CRIMINAL 0.157894736842
NON-CRIMINAL 0.0875
OBSCENITY 0.8125
OTHER NARCOTIC VIOLATION 0.662921348315
PROSTITUTION 0.993669794823
PUBLIC INDECENCY 0.990740740741
PUBLIC PEACE VIOLATION 0.709065334711
RITUALISM 0.111111111111
ROBBERY 0.0994137077803
STALKING 0.154142581888
THEFT 0.114992938794
WEAPONS VIOLATION 0.799763006176

In [ ]:


In [ ]: