Atlanta Police Department

The Atlanta Police Department provides Part 1 crime data at http://www.atlantapd.org/i-want-to/crime-data-downloads

A recent copy of the data file is stored in the cluster. Please, do not copy this data file into your home directory!

Introduction

  • This notebooks leads into an exploration of public crime data provided by the Atlanta Police Department.
  • The original data set and supplemental information can be found at http://www.atlantapd.org/i-want-to/crime-data-downloads
  • The data set is available on ARC, please, don't download into your home directory on ARC!

In [1]:
import numpy as np
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt

In [3]:
ls -l /home/data/APD/COBRA-YTD*.csv.gz


-rw-r--r-- 1 pmolnar data   763171 Sep 26 13:13 /home/data/APD/COBRA-YTD2017.csv.gz
-rw-r--r-- 1 pmolnar data 11253630 Oct  4 18:41 /home/data/APD/COBRA-YTD-multiyear.csv.gz

In [4]:
df = pd.read_csv('/home/data/APD/COBRA-YTD-multiyear.csv.gz')
df.shape


/usr/lib/python2.7/site-packages/IPython/core/interactiveshell.py:2717: DtypeWarning: Columns (13) have mixed types. Specify dtype option on import or set low_memory=False.
  interactivity=interactivity, compiler=compiler, result=result)
Out[4]:
(285733, 23)

In [6]:
df.dtypes


Out[6]:
MI_PRINX               int64
offense_id             int64
rpt_date              object
occur_date            object
occur_time            object
poss_date             object
poss_time             object
beat                 float64
apt_office_prefix     object
apt_office_num        object
location              object
MinOfucr               int64
MinOfibr_code         object
dispo_code            object
MaxOfnum_victims     float64
Shift                 object
Avg Day               object
loc_type             float64
UC2 Literal           object
neighborhood          object
npu                   object
x                    float64
y                    float64
dtype: object

In [ ]:


In [ ]:


In [7]:
dataDict = pd.DataFrame({'DataType': df.dtypes.values, 'Description': '', }, index=df.columns.values)

We need to enter the descriptions for each entry in our dictionary manually. However, why not just create a the Python code automatically...

Run the code below only if you haven't edited the datadict.py file in a different way, since it will overwrite what you have so far. (That's why the code is commented-out.)


In [8]:
dataDict


Out[8]:
DataType Description
MI_PRINX int64
offense_id int64
rpt_date object
occur_date object
occur_time object
poss_date object
poss_time object
beat int64
apt_office_prefix object
apt_office_num object
location object
MinOfucr int64
MinOfibr_code object
dispo_code object
MaxOfnum_victims float64
Shift object
Avg Day object
loc_type float64
UC2 Literal object
neighborhood object
npu object
x float64
y float64

In [10]:
with open("datadict2.py", "w") as io:
     for i in dataDict.index:
         io.write("dataDict.loc['%s'].Description = '' # type: %s\n" % (i, str(dataDict.loc[i].DataType)))

In [11]:
ls -l datadict2.py


-rw-r--r-- 1 pmolnar pmolnar 1329 Sep 27 19:33 datadict2.py

In [14]:
# %load datadict2.py
dataDict.loc['MI_PRINX'].Description = '' # type: int64
dataDict.loc['offense_id'].Description = '' # type: int64
dataDict.loc['rpt_date'].Description = '' # type: object
dataDict.loc['occur_date'].Description = '' # type: object
dataDict.loc['occur_time'].Description = '' # type: object
dataDict.loc['poss_date'].Description = '' # type: object
dataDict.loc['poss_time'].Description = '' # type: object
dataDict.loc['beat'].Description = '' # type: int64
dataDict.loc['apt_office_prefix'].Description = '' # type: object
dataDict.loc['apt_office_num'].Description = '' # type: object
dataDict.loc['location'].Description = '' # type: object
dataDict.loc['MinOfucr'].Description = '' # type: int64
dataDict.loc['MinOfibr_code'].Description = '' # type: object
dataDict.loc['dispo_code'].Description = '' # type: object
dataDict.loc['MaxOfnum_victims'].Description = '' # type: float64
dataDict.loc['Shift'].Description = '' # type: object
dataDict.loc['Avg Day'].Description = '' # type: object
dataDict.loc['loc_type'].Description = '' # type: float64
dataDict.loc['UC2 Literal'].Description = '' # type: object
dataDict.loc['neighborhood'].Description = '' # type: object
dataDict.loc['npu'].Description = '' # type: object
dataDict.loc['x'].Description = '' # type: float64
dataDict.loc['y'].Description = '' # type: float64

In [ ]:
# %load datadict.py
dataDict.loc['MI_PRINX'].Description = '' # type: int64
dataDict.loc['offense_id'].Description = 'Unique ID in the format YYDDDNNNN with the year YY, the day of the year DDD and a counter NNNN' # type: int64
dataDict.loc['rpt_date'].Description = 'Date the crime was reported' # type: object
dataDict.loc['occur_date'].Description = 'Estimated date when the crime occured' # type: object
dataDict.loc['occur_time'].Description = 'Estimated time when the crime occured' # type: object
dataDict.loc['poss_date'].Description = '' # type: object
dataDict.loc['poss_time'].Description = '' # type: object
dataDict.loc['beat'].Description = '' # type: int64
dataDict.loc['apt_office_prefix'].Description = '' # type: object
dataDict.loc['apt_office_num'].Description = '' # type: object
dataDict.loc['location'].Description = '' # type: object
dataDict.loc['MinOfucr'].Description = '' # type: int64
dataDict.loc['MinOfibr_code'].Description = '' # type: object
dataDict.loc['dispo_code'].Description = '' # type: object
dataDict.loc['MaxOfnum_victims'].Description = '' # type: float64
dataDict.loc['Shift'].Description = 'Zones have 8 or 10 hour shifts' # type: object
dataDict.loc['Avg Day'].Description = '' # type: object
dataDict.loc['loc_type'].Description = '' # type: float64
dataDict.loc['UC2 Literal'].Description = '' # type: object
dataDict.loc['neighborhood'].Description = '' # type: object
dataDict.loc['npu'].Description = '' # type: object
dataDict.loc['x'].Description = '' # type: float64
dataDict.loc['y'].Description = '' # type: float64
dataDict.to_csv("COBRA_Data_Dictionary.csv")

In [27]:
sorted(df.npu.unique())


Out[27]:
[nan,
 'A',
 'B',
 'C',
 'D',
 'E',
 'F',
 'G',
 'H',
 'I',
 'J',
 'K',
 'L',
 'M',
 'N',
 'O',
 'P',
 'Q',
 'R',
 'S',
 'T',
 'V',
 'W',
 'X',
 'Y',
 'Z']

In [31]:
len(df.neighborhood.unique())


Out[31]:
235

Fixing Data Types


In [ ]:
print df.groupby("Shift").count().index

Date and Time

Create a proper text field

In order to use the text to date/time converter our text columns need to have the appropriate format.


In [15]:
df[['occur_date', 'occur_time']]


Out[15]:
occur_date occur_time
0 09/06/2017 00:30:00
1 09/05/2017 11:15:00
2 09/06/2017 03:15:00
3 09/05/2017 17:45:00
4 09/05/2017 17:00:00
5 09/06/2017 05:30:00
6 09/06/2017 06:50:00
7 09/06/2017 06:50:00
8 09/06/2017 04:00:00
9 09/06/2017 04:00:00
10 09/03/2017 11:00:00
11 09/05/2017 21:00:00
12 09/06/2017 07:30:00
13 09/05/2017 22:00:00
14 09/06/2017 08:30:00
15 09/06/2017 09:20:00
16 09/06/2017 07:00:00
17 09/05/2017 19:30:00
18 09/06/2017 10:19:00
19 09/06/2017 10:15:00
20 09/06/2017 07:00:00
21 09/06/2017 10:30:00
22 09/06/2017 11:00:00
23 09/06/2017 11:25:00
24 08/28/2017 21:07:00
25 08/31/2017 00:00:00
26 09/06/2017 13:00:00
27 09/06/2017 13:15:00
28 09/06/2017 00:44:00
29 09/06/2017 08:00:00
... ... ...
17395 12/31/2016 10:30:00
17396 01/01/2017 10:35:00
17397 12/05/2016 00:00:00
17398 01/01/2017 19:25:00
17399 01/01/2017 14:30:00
17400 01/01/2017 18:50:00
17401 01/01/2017 20:15:00
17402 01/01/2017 20:30:00
17403 01/01/2017 21:05:00
17404 01/01/2017 21:15:00
17405 12/30/2016 23:00:00
17406 01/01/2017 21:00:00
17407 01/01/2017 22:00:00
17408 01/01/2017 20:15:00
17409 01/01/2017 23:02:00
17410 01/01/2017 23:14:00
17411 01/01/2017 23:32:00
17412 01/01/2017 13:30:00
17413 12/31/2016 22:30:00
17414 12/31/2016 19:00:00
17415 12/31/2016 19:05:00
17416 01/01/2017 15:00:00
17417 01/01/2017 14:50:00
17418 01/01/2017 15:37:00
17419 01/01/2017 14:15:00
17420 12/03/2016 08:00:00
17421 01/01/2017 17:00:00
17422 12/31/2016 20:00:00
17423 01/01/2017 16:10:00
17424 12/30/2016 22:00:00

17425 rows × 2 columns


In [17]:
# function currying
def fixdatetime(fld):
    def _fix(s):
        date_col = '%s_date' % fld
        time_col = '%s_time' % fld
        if time_col in s.index:
            return str(s[date_col])+' '+str(s[time_col])
        else:
            return str(s[date_col])+' 00:00:00'
    return _fix

df.apply(fixdatetime('occur'), axis=1)[:10]


Out[17]:
0    09/06/2017 00:30:00
1    09/05/2017 11:15:00
2    09/06/2017 03:15:00
3    09/05/2017 17:45:00
4    09/05/2017 17:00:00
5    09/06/2017 05:30:00
6    09/06/2017 06:50:00
7    09/06/2017 06:50:00
8    09/06/2017 04:00:00
9    09/06/2017 04:00:00
dtype: object

Convert Columns


In [18]:
for col in ['rpt', 'occur', 'poss']:
    datser = df.apply(fixdatetime(col), axis=1)
    df['%s_dt'%col] = pd.to_datetime(datser, format="%m/%d/%Y %H:%M:%S", errors='coerce')

In [19]:
df.head()


Out[19]:
MI_PRINX offense_id rpt_date occur_date occur_time poss_date poss_time beat apt_office_prefix apt_office_num ... Avg Day loc_type UC2 Literal neighborhood npu x y rpt_dt occur_dt poss_dt
0 7693210 172490115 09/06/2017 09/06/2017 00:30:00 09/06/2017 00:35:00 607 NaN NaN ... Wed NaN AGG ASSAULT Custer/McDonough/Guice W -84.35850 33.70839 2017-09-06 2017-09-06 00:30:00 2017-09-06 00:35:00
1 7693211 172490265 09/06/2017 09/05/2017 11:15:00 09/06/2017 02:30:00 512 NaN NaN ... Tue 99.0 LARCENY-FROM VEHICLE Downtown M -84.39736 33.74958 2017-09-06 2017-09-05 11:15:00 2017-09-06 02:30:00
2 7693212 172490322 09/06/2017 09/06/2017 03:15:00 09/06/2017 03:45:00 501 NaN NaN ... Wed 18.0 LARCENY-FROM VEHICLE Atlantic Station E -84.39776 33.79072 2017-09-06 2017-09-06 03:15:00 2017-09-06 03:45:00
3 7693213 172490390 09/06/2017 09/05/2017 17:45:00 09/06/2017 04:57:00 207 NaN NaN ... Tue 18.0 LARCENY-FROM VEHICLE Brookwood Hills E -84.39361 33.80774 2017-09-06 2017-09-05 17:45:00 2017-09-06 04:57:00
4 7693214 172490401 09/06/2017 09/05/2017 17:00:00 09/06/2017 05:00:00 203 NaN NaN ... Tue 18.0 LARCENY-FROM VEHICLE Hills Park D -84.43337 33.79848 2017-09-06 2017-09-05 17:00:00 2017-09-06 05:00:00

5 rows × 26 columns


In [20]:
df.dtypes


Out[20]:
MI_PRINX                      int64
offense_id                    int64
rpt_date                     object
occur_date                   object
occur_time                   object
poss_date                    object
poss_time                    object
beat                          int64
apt_office_prefix            object
apt_office_num               object
location                     object
MinOfucr                      int64
MinOfibr_code                object
dispo_code                   object
MaxOfnum_victims            float64
Shift                        object
Avg Day                      object
loc_type                    float64
UC2 Literal                  object
neighborhood                 object
npu                          object
x                           float64
y                           float64
rpt_dt               datetime64[ns]
occur_dt             datetime64[ns]
poss_dt              datetime64[ns]
dtype: object

Beats and Zones

The City of Atlanta is divided into 6 zones. Each with 12 to 14 beats.

Let's create a separate column for the zones:


In [21]:
df.beat[:10]


Out[21]:
0    607
1    512
2    501
3    207
4    203
5    605
6    511
7    508
8    408
9    401
Name: beat, dtype: int64

In [22]:
df['Zone'] = df['beat']//100

In [23]:
df.Zone[:4]


Out[23]:
0    6
1    5
2    5
3    2
Name: Zone, dtype: int64

In [32]:
df['UC2 Literal'].unique()


Out[32]:
array(['AGG ASSAULT', 'LARCENY-FROM VEHICLE', 'AUTO THEFT',
       'LARCENY-NON VEHICLE', 'ROBBERY-PEDESTRIAN', 'BURGLARY-NONRES',
       'BURGLARY-RESIDENCE', 'ROBBERY-COMMERCIAL', 'RAPE',
       'ROBBERY-RESIDENCE', 'HOMICIDE'], dtype=object)

In [ ]:
##df[df['UC2 Literal']=='LARCENY-FROM VEHICLE']

In [37]:
df.occur_date.min(), df.occur_date.max()


Out[37]:
('01/01/2008', '12/31/2016')

In [43]:
df['Year'] = df.rpt_dt.map(lambda d: d.year)

In [44]:
df.groupby('Year').offense_id.count()


Out[44]:
Year
2017    17425
Name: offense_id, dtype: int64

In [63]:
brdf = df[df['UC2 Literal']=='BURGLARY-RESIDENCE'].copy()

In [50]:
brdf.shape, df.shape


Out[50]:
((1711, 28), (17425, 28))

In [53]:
def gethour(d):
    return d.hour

brdf.occur_dt.map(gethour)
##brdf['occur_dt'].map(gethour)


##brdf.occur_dt.map(lambda d: d.hour)


Out[53]:
16        7
25        0
26       13
41       23
77        9
87       12
92        0
99       14
118      21
121       8
132       3
135       4
141      21
149      14
177      13
198      18
202      21
232      10
236      10
251      20
260      23
277       0
289       5
361      10
377       2
388       0
390      10
401      14
402      12
403      14
         ..
17202    12
17208    19
17214    12
17221    13
17227    14
17245     7
17255    10
17268     5
17275    14
17284    23
17286     7
17289     2
17291     3
17304    23
17306     9
17310    13
17311    14
17329     8
17338    14
17349     0
17352     6
17353     1
17356    22
17363     4
17364    23
17378    15
17383    22
17401    20
17408    20
17424    22
Name: occur_dt, dtype: int64

In [64]:
print type(brdf.occur_dt)
brdf['Hour'] = brdf.occur_dt.apply(gethour)


<class 'pandas.core.series.Series'>

In [65]:
brdf.head()


Out[65]:
MI_PRINX offense_id rpt_date occur_date occur_time poss_date poss_time beat apt_office_prefix apt_office_num ... neighborhood npu x y rpt_dt occur_dt poss_dt Zone Year Hour
16 7693226 172490896 09/06/2017 09/06/2017 07:00:00 09/06/2017 10:00:00 203 NaN 302 ... Bolton D -84.44920 33.81667 2017-09-06 2017-09-06 07:00:00 2017-09-06 10:00:00 2 2017 7
25 7693235 172491176 09/06/2017 08/31/2017 00:00:00 09/06/2017 12:54:00 312 NaN NaN ... Orchard Knob Z -84.37872 33.66754 2017-09-06 2017-08-31 00:00:00 2017-09-06 12:54:00 3 2017 0
26 7693236 172491268 09/06/2017 09/06/2017 13:00:00 09/06/2017 13:20:00 412 NaN 211 ... Ben Hill Terrace P -84.51830 33.69598 2017-09-06 2017-09-06 13:00:00 2017-09-06 13:20:00 4 2017 13
41 7693251 172491966 09/06/2017 09/05/2017 23:00:00 09/06/2017 16:00:00 412 NaN N-5 ... Kings Forest P -84.51621 33.69231 2017-09-06 2017-09-05 23:00:00 2017-09-06 16:00:00 4 2017 23
77 7693160 172480966 09/05/2017 09/05/2017 09:40:00 09/05/2017 09:50:00 301 NaN NaN ... Adair Park V -84.41022 33.73028 2017-09-05 2017-09-05 09:40:00 2017-09-05 09:50:00 3 2017 9

5 rows × 29 columns


In [71]:
br_hr = brdf.groupby(['Hour']).offense_id.count()

In [74]:
plt.step(br_hr.index, br_hr.values)


Out[74]:
[<matplotlib.lines.Line2D at 0x6031490>]

In [83]:
plt.figure(figsize=(20,8))
for z in range(1,7):
    plt.subplot(3,2,z)
    plt.title("Zone %d" % z)
    #brdf[brdf.Zone==z].hist(column='Hour', bins=24)
    plt.hist(brdf[brdf.Zone==z].Hour, bins=24)
plt.show()



In [88]:
plt.figure(figsize=(30,15))
for h in range(24):
    plt.subplot(4,6,h+1)
    plt.title("Hour %d" % h)
    #brdf[brdf.Zone==z].hist(column='Hour', bins=24)
    plt.hist(brdf[brdf.Hour==h].Zone, bins=6)
    plt.ylim(0,40) ## sets limit on Y-axis
plt.show()



In [ ]:


In [90]:
df['UC2 Literal'].unique()


Out[90]:
array(['AGG ASSAULT', 'LARCENY-FROM VEHICLE', 'AUTO THEFT',
       'LARCENY-NON VEHICLE', 'ROBBERY-PEDESTRIAN', 'BURGLARY-NONRES',
       'BURGLARY-RESIDENCE', 'ROBBERY-COMMERCIAL', 'RAPE',
       'ROBBERY-RESIDENCE', 'HOMICIDE'], dtype=object)

In [92]:
df.groupby(['UC2 Literal', 'Zone']).offense_id.count()


Out[92]:
UC2 Literal          Zone
AGG ASSAULT          1       397
                     2        96
                     3       338
                     4       309
                     5       140
                     6       114
                     7         2
AUTO THEFT           1       336
                     2       382
                     3       455
                     4       421
                     5       255
                     6       231
                     7         5
BURGLARY-NONRES      1        67
                     2       122
                     3        88
                     4        99
                     5        54
                     6        88
BURGLARY-RESIDENCE   1       414
                     2       225
                     3       324
                     4       429
                     5        60
                     6       259
HOMICIDE             1        15
                     2         6
                     3        14
                     4         6
                            ... 
LARCENY-NON VEHICLE  4       596
                     5       785
                     6       738
                     7       142
RAPE                 1        36
                     2        19
                     3        29
                     4        25
                     5        24
                     6        14
                     7         1
ROBBERY-COMMERCIAL   1         9
                     2        23
                     3        16
                     4        24
                     5         7
                     6        13
ROBBERY-PEDESTRIAN   1       143
                     2        89
                     3       148
                     4       142
                     5       114
                     6       121
                     7         1
ROBBERY-RESIDENCE    1        23
                     2         6
                     3        21
                     4        22
                     5         5
                     6         9
Name: offense_id, dtype: int64

In [93]:
df['dayofweek'] = df.occur_dt.map(lambda d: d.dayofweek)

In [97]:
df.groupby(['UC2 Literal','dayofweek']).offense_id.count()


Out[97]:
UC2 Literal          dayofweek
AGG ASSAULT          0.0          189
                     1.0          188
                     2.0          181
                     3.0          178
                     4.0          216
                     5.0          205
                     6.0          239
AUTO THEFT           0.0          276
                     1.0          269
                     2.0          294
                     3.0          268
                     4.0          275
                     5.0          374
                     6.0          329
BURGLARY-NONRES      0.0           80
                     1.0           66
                     2.0           76
                     3.0           74
                     4.0           62
                     5.0           82
                     6.0           78
BURGLARY-RESIDENCE   0.0          250
                     1.0          260
                     2.0          227
                     3.0          252
                     4.0          282
                     5.0          249
                     6.0          191
HOMICIDE             0.0            6
                     1.0           10
                                 ... 
LARCENY-NON VEHICLE  5.0          614
                     6.0          513
RAPE                 0.0           16
                     1.0           19
                     2.0           18
                     3.0           18
                     4.0           19
                     5.0           26
                     6.0           32
ROBBERY-COMMERCIAL   0.0           20
                     1.0           19
                     2.0           14
                     3.0           10
                     4.0           11
                     5.0            7
                     6.0           11
ROBBERY-PEDESTRIAN   0.0           97
                     1.0          124
                     2.0          108
                     3.0           93
                     4.0           81
                     5.0          132
                     6.0          123
ROBBERY-RESIDENCE    0.0           16
                     1.0           17
                     2.0           10
                     3.0           11
                     4.0            8
                     5.0            8
                     6.0           16
Name: offense_id, dtype: int64

In [58]:
brdf.apply(lambda r: str(r.location)+', '+str(r.npu), axis=1)


Out[58]:
16               2232 DUNSEATH AVE NW, D
25                   454 OAK DRIVE SE, Z
26               2130 SOUTHWOOD CV SW, P
41             3901 CAMPBELLTON RD SW, P
77                    904 TIFT AVE SW, V
87             5110 ARBOR GATES DR NE, B
92                     708 JETT ST NW, L
99                   87 E LAKE TER SE, O
118               266 JOSEPHINE ST NE, N
121           870 MAYSON TURNER RD NW, L
132                775 ARGONNE AVE NE, E
135                3247 ROSWELL RD NE, B
141           445 CLEVELAND AVENUE SE, Z
149            2512 CAMPBELLTON RD SW, R
177             985 BUCKINGHAM CIR NW, A
198                  1041 STATE ST NW, E
202                    899 PARK ST SW, T
232            1870 CAMPBELLTON RD SW, R
236                 1935 ALISON CT SW, R
251                 58 SHERIDAN DR NE, B
260              1660 PEACHTREE ST NW, E
277      262 ARTHUR LANGFORD JR PL SW, Y
289               370 NORTHSIDE DR NW, L
361                  600 GARSON DR NE, B
377              1539 LAKEWOOD AVE SE, Y
388                     1051 FERN AVE, V
390                 641 HANSELL ST SE, W
401                     271 OAK DR SE, Z
402                  226 CORLEY ST NE, M
403                 321 LAQUITA DR SE, Y
                      ...               
17202               877 CUSTER AVE SE, W
17208               445 MARKHAM ST SW, M
17214             144 MORELAND AVE NE, N
17221                520 FULTON ST SW, V
17227               1881 MYRTLE DR SW, R
17245             863 KIRKWOOD AVE SE, N
17255            2232 DUNSEATH AVE NW, D
17268       740 SIDNEY MARCUS BLVD NE, B
17275                352 ELVIRA ST SE, Y
17284    3725 PRINCETON LAKES PKWY SW, P
17286                  610 AMAL DR SW, Y
17289               741 MOROSGO DR NE, B
17291                507 BISHOP ST NW, E
17304                 453 UTOY CIR SW, H
17306             597 TARRAGON WAY SW, H
17310                   978 BRUCE CIR, W
17311                385 LEGACY DR SW, T
17329               2000 MONROE PL NE, F
17338               2000 MONROE PL NE, F
17349           2778 DALE CREEK DR NW, I
17352             3432 PIEDMONT RD NE, B
17353               200 CLIFTON ST SE, O
17356          565 GREENSFERRY AVE SW, T
17363             308 WOODWARD AVE SE, W
17364                  880 ROCK ST NW, L
17378            1070 ORMEWOOD AVE SE, W
17383          1829 CAMPBELLTON RD SW, S
17401               2500 CENTER ST NW, G
17408             1094 DECKNER AVE SW, X
17424            2591 ETHERIDGE DR NW, G
dtype: object

In [60]:
brdf.apply(np.min, axis=0)


Out[60]:
MI_PRINX                              7652723
offense_id                          160821939
rpt_date                           01/01/2017
occur_date                         01/01/2016
occur_time                           00:00:00
poss_date                          01/01/2017
poss_time                            00:00:00
beat                                      101
apt_office_prefix                         inf
apt_office_num                            inf
location             1 PEACHTREE HILLS AVE NE
MinOfucr                                  511
MinOfibr_code                            2202
dispo_code                                inf
MaxOfnum_victims                            0
Shift                                     Day
Avg Day                                   Fri
loc_type                                    2
UC2 Literal                BURGLARY-RESIDENCE
neighborhood                              inf
npu                                       inf
x                                    -84.5465
y                                     33.6541
rpt_dt                    2017-01-01 00:00:00
occur_dt                  2016-01-01 00:00:00
poss_dt                   2016-11-01 22:00:00
Zone                                        1
Year                                     2017
dtype: object

In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:
df.occur_dt.map(lambda d: d.year).unique()

In [ ]:
df['Year'] = df.occur_dt.map(lambda d: d.year)
df2 = df[(df.Year>=2010) & (df.Year<=2017)]
df2.shape, df.shape

In [ ]:


In [ ]:


In [ ]:
df_LarcenyFromVehicle = df2[(df2['UC2 Literal']=='LARCENY-FROM VEHICLE')&(df2.Year==2017)].copy()
agr_LarcenyFromVehicle = df_LarcenyFromVehicle.set_index('occur_dt').resample('W').offense_id.count()
agr_LarcenyFromVehicle

In [ ]:
df_LarcenyFromVehicle["Hour"] = df_LarcenyFromVehicle.occur_dt.map(lambda d: d.hour)

In [ ]:
df_LarcenyFromVehicle.groupby("Hour").offense_id.count()

In [ ]:
hourly = df_LarcenyFromVehicle.resample('H', on='occur_dt').offense_id.count()

In [ ]:
hourly.reset_index().occur_dt.map(lambda d: d.week)

In [ ]:
df3 = pd.DataFrame({"N": hourly})
##df3['Day'] = df3.reset_index().occur_dt ##.map(lambda d: d.day)
df3

In [ ]:
ls

In [ ]:
fig = plt.figure(figsize=(10,6)) # 10inx10in
#plt.plot(resdf['BURGLARY-RESIDENCE'].index, resdf['BURGLARY-RESIDENCE'])
plt.scatter(resdf['BURGLARY-RESIDENCE'].index, resdf['BURGLARY-RESIDENCE'], marker='x')
plt.scatter(resdf['BURGLARY-NONRES'].index, resdf['BURGLARY-NONRES'], marker='o')

plt.ylim(0, 500)
plt.title('BURGLARY-RESIDENCE')
plt.xticks(range(13), ['', 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'])
fig.savefig('BurglaryResidence_over_month.svg')
x = 1

In [ ]:
def getTheMonth(x):
    return x.month

df['occur_month'] = df['occur_ts'].map(getTheMonth)

In [ ]:
resdf = df.groupby(['UC2 Literal', 'occur_month']).offense_id.count()
fig = plt.figure(figsize=(10,6))
plt.scatter(resdf['BURGLARY-RESIDENCE'].index, resdf['BURGLARY-RESIDENCE'], marker='x')
plt.ylim(0, 500)
plt.title('BURGLARY-RESIDENCE')
plt.xticks(range(13), ['', 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'])
plt.savefig('quiz3-burglary-residence.png')

Seasonal Model


In [ ]:
fig = plt.figure(figsize=(40,30))
crime_types = crime_year.index.levels[0]
years = crime_year.index.levels[1]
for c in range(len(crime_types)):
    y_max = max(crime_year.loc[crime_types[c]])
    
    plt.subplot(4,3,c+1)
    plt.hlines(crime_year.loc[crime_types[c]].iloc[-1]*100/y_max, years[0], years[-1], linestyles="dashed", color="r")
    plt.bar(crime_year.loc[crime_types[c]].index, crime_year.loc[crime_types[c]]*100/y_max, label=crime_types[c], alpha=0.5)
    ##plt.legend()
    plt.ylim(0, 100)
    plt.xticks(years+0.4, [str(int(y)) for y in years], rotation=0, fontsize=24)
    plt.yticks([0,20,40,60,80,100], ['0%','20%','40%','60%','80%','100%'], fontsize=24)
    plt.title(crime_types[c], fontsize=30)
    None

In [ ]:
c = 3 ## 'BURGLARY-RESIDENCE'
resburglaries = crime_year_month.loc[crime_types[c]]
fig = plt.figure(figsize=(20,10))
for y in years:
    plt.plot(resburglaries.loc[y].index, resburglaries.loc[y], label=("%4.0f"%y))
plt.legend()
plt.title("Seasonal Trends - %s"%crime_types[c], fontsize=20)
plt.xticks(range(13), ['', 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'])
plt.xlim(0,13)
None

In [ ]:
c = 3 ## 'BURGLARY-RESIDENCE'
fig = plt.figure(figsize=(20,10))
for y in years:
    avg = resburglaries.loc[y].mean()
    std = resburglaries.loc[y].std()
    ##plt.hlines(avg, 1, 13, linestyle='dashed')
    plt.plot(resburglaries.loc[y].index, (resburglaries.loc[y]-avg)/std, label=("%4.0f"%y))
plt.legend()
plt.title("Seasonal Trends - %s (normalized)"%crime_types[c], fontsize=20)
plt.xticks(list(range(1,13)), ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'])
plt.xlim(0,13)
plt.ylabel("Standard deviations $\sigma_y$")
None

In [ ]:


In [ ]:


In [ ]: