In [1]:

    
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

Atlanta Police Department

The Atlanta Police Department provides Part 1 crime data at http://www.atlantapd.org/i-want-to/crime-data-downloads

A recent copy of the data file is stored in the cluster. Please, do not copy this data file into your home directory!



In [1]:

    
### Load libraries
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

Load data (don't change this if you're running the notebook on the cluster)

We have two files

/home/data/APD/COBRA083016_2015.xlsx for 2015
/home/data/APD/COBRA083016.xlsx from 2009 to current date



In [2]:

    
%%time
df = pd.read_excel('/home/data/APD/COBRA083016_2015.xlsx', sheetname='Query')









    



CPU times: user 12.8 s, sys: 8.73 ms, total: 12.8 s
Wall time: 26.4 s



In [3]:

    
df.shape









    Out[3]:





(30011, 23)



In [4]:

    
for c in df.columns:
    print(c)









    



MI_PRINX
offense_id
rpt_date
occur_date
occur_time
poss_date
poss_time
beat
apt_office_prefix
apt_office_num
location
MinOfucr
MinOfibr_code
dispo_code
MaxOfnum_victims
Shift
Avg Day
loc_type
UC2 Literal
neighborhood
npu
x
y



In [5]:

    
df.head()









    Out[5]:






  
    
      
      MI_PRINX
      offense_id
      rpt_date
      occur_date
      occur_time
      poss_date
      poss_time
      beat
      apt_office_prefix
      apt_office_num
      ...
      dispo_code
      MaxOfnum_victims
      Shift
      Avg Day
      loc_type
      UC2 Literal
      neighborhood
      npu
      x
      y
    
  
  
    
      206914
      1371687
      150562000
      05/14/2013
      05/14/2013
      09:00:00
      05/14/2013
      11:30:00
      205
      NaN
      NaN
      ...
      NaN
      1
      Day
      Tue
      18.0
      LARCENY-FROM VEHICLE
      Woodfield
      C
      -84.40912
      33.82308
    
    
      207443
      4346442
      150010052
      01/01/2015
      12/31/2014
      22:00:00
      01/01/2015
      00:07:00
      512
      NaN
      NaN
      ...
      NaN
      1
      Morn
      Wed
      NaN
      LARCENY-FROM VEHICLE
      Downtown
      M
      -84.39361
      33.75246
    
    
      207444
      4346443
      150010079
      01/01/2015
      01/01/2015
      00:03:00
      01/01/2015
      00:03:00
      606
      NaN
      3377
      ...
      NaN
      1
      Morn
      Thu
      26.0
      ROBBERY-PEDESTRIAN
      Grant Park
      W
      -84.35917
      33.73991
    
    
      207445
      4346444
      150010151
      01/01/2015
      12/31/2014
      23:45:00
      01/01/2015
      00:21:00
      208
      NaN
      NaN
      ...
      20
      1
      Morn
      Thu
      18.0
      LARCENY-NON VEHICLE
      Buckhead Forest
      B
      -84.37462
      33.84564
    
    
      207446
      4346445
      150010214
      01/01/2015
      01/01/2015
      00:30:00
      01/01/2015
      01:05:00
      407
      1000
      1009
      ...
      NaN
      2
      Morn
      Thu
      26.0
      AGG ASSAULT
      Fairburn Mays
      H
      -84.50968
      33.74349
    
  

5 rows × 23 columns



In [24]:

    
dfshort = df[['offense_id','occur_date','occur_time','UC2 Literal']]



In [26]:

    
dfshort.index = pd.TimedeltaIndex(list(dfshort.occur_time))



In [27]:

    
dfshort.head()









    Out[27]:






  
    
      
      offense_id
      occur_date
      occur_time
      UC2 Literal
    
  
  
    
      09:00:00
      150562000
      05/14/2013
      09:00:00
      LARCENY-FROM VEHICLE
    
    
      22:00:00
      150010052
      12/31/2014
      22:00:00
      LARCENY-FROM VEHICLE
    
    
      00:03:00
      150010079
      01/01/2015
      00:03:00
      ROBBERY-PEDESTRIAN
    
    
      23:45:00
      150010151
      12/31/2014
      23:45:00
      LARCENY-NON VEHICLE
    
    
      00:30:00
      150010214
      01/01/2015
      00:30:00
      AGG ASSAULT



In [7]:

    
df.offense_id.min(), df.offense_id.max()









    Out[7]:





(150010052, 153658045)



In [11]:

    
df.columns









    Out[11]:





Index([u'MI_PRINX', u'offense_id', u'rpt_date', u'occur_date', u'occur_time',
       u'poss_date', u'poss_time', u'beat', u'apt_office_prefix',
       u'apt_office_num', u'location', u'MinOfucr', u'MinOfibr_code',
       u'dispo_code', u'MaxOfnum_victims', u'Shift', u'Avg Day', u'loc_type',
       u'UC2 Literal', u'neighborhood', u'npu', u'x', u'y'],
      dtype='object')



In [8]:

    
crime_summary = df.groupby(['UC2 Literal', 'neighborhood']).offense_id.count()



In [13]:

    
crime_summary.index









    Out[13]:





MultiIndex(levels=[[u'AGG ASSAULT', u'AUTO THEFT', u'BURGLARY-NONRES', u'BURGLARY-RESIDENCE', u'LARCENY-FROM VEHICLE', u'LARCENY-NON VEHICLE', u'RAPE', u'ROBBERY-COMMERCIAL', u'ROBBERY-PEDESTRIAN', u'ROBBERY-RESIDENCE'], [u'Adair Park', u'Adams Park', u'Adamsville', u'Almond Park', u'Amal Heights', u'Ansley Park', u'Arden/Habersham', u'Ardmore', u'Argonne Forest', u'Arlington Estates', u'Ashley Courts', u'Ashview Heights', u'Atkins Park', u'Atlanta Industrial Park', u'Atlanta University Center', u'Atlantic Station', u'Audobon Forest', u'Audobon Forest West', u'Baker Hills', u'Bakers Ferry', u'Bankhead', u'Bankhead Courts', u'Bankhead/Bolton', u'Beecher Hills', u'Ben Hill', u'Ben Hill Acres', u'Ben Hill Forest', u'Ben Hill Pines', u'Ben Hill Terrace', u'Benteen Park', u'Berkeley Park', u'Betmar LaVilla', u'Blair Villa/Poole Creek', u'Blandtown', u'Bolton', u'Bolton Hills', u'Boulder Park', u'Boulevard Heights', u'Brandon', u'Brentwood', u'Briar Glen', u'Brookhaven', u'Brookview Heights', u'Brookwood', u'Brookwood Hills', u'Browns Mill Park', u'Buckhead Forest', u'Buckhead Heights', u'Buckhead Village', u'Bush Mountain', u'Butner/Tell', u'Cabbagetown', u'Campbellton Road', u'Candler Park', u'Capitol Gateway', u'Capitol View', u'Capitol View Manor', u'Carey Park', u'Carroll Heights', u'Carver Hills', u'Cascade Avenue/Road', u'Cascade Green', u'Cascade Heights', u'Castleberry Hill', u'Castlewood', u'Center Hill', u'Chalet Woods', u'Channing Valley', u'Chastain Park', u'Chosewood Park', u'Collier Heights', u'Collier Hills', u'Collier Hills North', u'Colonial Homes', u'Cross Creek', u'Custer/McDonough/Guice', u'Deerwood', u'Dixie Hills', u'Downtown', u'Druid Hills', u'East Ardley Road', u'East Atlanta', u'East Chastain Park', u'East Lake', u'Edgewood', u'Elmco Estates', u'English Avenue', u'English Park', u'Fairburn', u'Fairburn Heights', u'Fairburn Mays', u'Fairburn Road/Wisteria Lane', u'Fairburn Tell', u'Fairway Acres', u'Fernleaf', u'Florida Heights', u'Fort McPherson', u'Fort Valley', u'Garden Hills', u'Georgia Tech', u'Glenrose Heights', u'Grant Park', u'Green Acres Valley', u'Green Forest Acres', u'Greenbriar', u'Greenbriar Village', u'Grove Park', u'Hammond Park', u'Hanover West', u'Harland Terrace', u'Harris Chiles', u'Harvel Homes Community', u'Heritage Valley', u'High Point', u'Hills Park', u'Home Park', u'Horseshoe Community', u'Hunter Hills', u'Huntington', u'Inman Park', u'Ivan Hill', u'Joyland', u'Just Us', u'Kings Forest', u'Kingswood', u'Kirkwood', u'Knight Park/Howell Station', u'Lake Claire', u'Lake Estates', u'Lakewood', u'Lakewood Heights', u'Laurens Valley', u'Leila Valley', u'Lenox', u'Lincoln Homes', u'Lindbergh/Morosgo', u'Lindridge/Martin Manor', u'Loring Heights', u'Magnum Manor', u'Margaret Mitchell', u'Marietta Street Artery', u'Mays', u'Meadowbrook Forest', u'Mechanicsville', u'Mellwood', u'Memorial Park', u'Midtown', u'Midwest Cascade', u'Monroe Heights', u'Morningside/Lenox Park', u'Mozley Park', u'Mt. Gilead Woods', u'Mt. Paran Parkway', u'Mt. Paran/Northside', u'Niskey Cove', u'Niskey Lake', u'North Buckhead', u'Norwood Manor', u'Oakcliff', u'Oakland', u'Oakland City', u'Old Fairburn Village', u'Old Fourth Ward', u'Old Gordon', u'Orchard Knob', u'Ormewood Park', u'Paces', u'Peachtree Battle Alliance', u'Peachtree Heights East', u'Peachtree Heights West', u'Peachtree Hills', u'Peachtree Park', u'Penelope Neighbors', u'Peoplestown', u'Perkerson', u'Peyton Forest', u'Piedmont Heights', u'Pine Hills', u'Pittsburgh', u'Pleasant Hill', u'Polar Rock', u'Pomona Park', u'Poncey-Highland', u'Princeton Lakes', u'Randall Mill', u'Rebel Valley Forest', u'Regency Trace', u'Reynoldstown', u'Ridgecrest Forest', u'Ridgedale Park', u'Ridgewood Heights', u'Riverside', u'Rockdale', u'Rosedale Heights', u'Rue Royal', u'Sandlewood Estates', u'Scotts Crossing', u'Sherwood Forest', u'South Atlanta', u'South River Gardens', u'South Tuxedo Park', u'Southwest', u'Springlake', u'Summerhill', u'Swallow Circle/Baywood', u'Sweet Auburn', u'Sylvan Hills', u'Tampa Park', u'The Villages at Carver', u'The Villages at Castleberry H', u'The Villages at East Lake', u'Thomasville Heights', u'Tuxedo Park', u'Underwood Hills', u'Venetian Hills', u'Vine City', u'Virginia Highland', u'Washington Park', u'Wesley Battle', u'West End', u'West Highlands', u'West Lake', u'West Manor', u'West Paces Ferry/Northside', u'Westhaven', u'Westminster/Milmar', u'Westover Plantation', u'Westview', u'Westwood Terrace', u'Whitewater Creek', u'Whittier Mill Village', u'Wildwood (NPU-C)', u'Wildwood (NPU-H)', u'Wildwood Forest', u'Wilson Mill Meadows', u'Wisteria Gardens', u'Woodfield', u'Woodland Hills', u'Wyngate']],
           labels=[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9], [0, 1, 2, 3, 8, 9, 10, 11, 14, 15, 18, 20, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 40, 41, 42, 44, 45, 46, 47, 48, 49, 52, 54, 55, 56, 57, 58, 59, 60, 62, 63, 65, 67, 69, 70, 72, 73, 74, 75, 76, 77, 78, 81, 83, 84, 85, 86, 87, 88, 89, 90, 91, 95, 96, 97, 98, 100, 101, 102, 103, 104, 105, 106, 107, 109, 110, 112, 113, 114, 115, 117, 119, 121, 122, 123, 125, 129, 130, 132, 133, 134, 135, 136, 137, 138, 140, 141, 142, 143, 144, 146, 148, 149, 150, 156, 157, 158, 160, 161, 162, 163, 164, 165, 166, 167, 169, 171, 173, 174, 176, 177, 178, 180, 181, 182, 183, 185, 187, 188, 191, 193, 194, 195, 196, 198, 199, 200, 201, 203, 204, 205, 206, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 219, 220, 221, 222, 223, 224, 227, 228, 230, 232, 234, 235, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 20, 22, 24, 25, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 65, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 93, 95, 96, 97, 98, 99, 100, 101, 102, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 132, 133, 134, 135, 136, 137, 138, 140, 141, 142, 143, 144, 146, 147, 148, 149, 150, 151, 153, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 180, 181, 182, 183, 184, 185, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 219, 220, 221, 222, 223, 224, 225, 227, 228, 229, 230, 231, 232, 233, 234, 235, 237, 0, 1, 2, 3, 5, 7, 10, 11, 12, 13, 14, 15, 20, 22, 23, 28, 30, 31, 32, 33, 34, 42, 44, 45, 46, 47, 48, 51, 52, 53, 55, 57, 58, 59, 60, 63, 65, 67, 69, 70, 73, 75, 77, 78, 81, 82, 83, 84, 85, 86, 87, 89, 90, 93, 95, 97, 98, 100, 101, 102, 104, 106, 107, 109, 114, 115, 117, 119, 120, 121, 123, 124, 125, 126, 129, 130, 132, 133, 135, 136, 137, 140, 143, 146, 148, 149, 150, 156, 157, 160, 162, 165, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 180, 181, 182, 183, 187, 191, 193, 198, 199, 200, 201, 203, 205, 206, 208, 209, 210, 213, 214, 215, 216, 218, 219, 221, 222, 223, 227, 228, 231, 237, 0, 1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 94, 95, 97, 98, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 146, 147, 148, 149, 150, 151, 153, 154, 155, 156, 157, 159, 160, 161, 162, 163, 164, 165, 166, 167, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 0, 1, 2, 3, 4, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, 20, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 92, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 109, 110, 111, 112, 113, 114, 115, 116, 117, 119, 120, 121, 122, 123, 124, 125, 126, 127, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 145, 146, 147, 148, 149, 150, 151, 153, 154, 156, 157, 158, 159, 160, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, 19, 20, 21, 22, 24, 25, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 38, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 67, 68, 69, 70, 71, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 95, 97, 98, 100, 101, 102, 103, 104, 105, 106, 107, 109, 110, 111, 112, 113, 114, 115, 117, 119, 120, 121, 122, 123, 125, 126, 127, 129, 130, 131, 132, 133, 134, 135, 136, 137, 139, 140, 141, 142, 143, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 156, 157, 158, 159, 160, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 180, 181, 182, 183, 184, 185, 187, 188, 189, 190, 191, 192, 193, 196, 198, 199, 200, 201, 202, 203, 204, 205, 206, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 227, 228, 230, 231, 232, 233, 234, 235, 236, 237, 238, 0, 3, 7, 11, 14, 18, 19, 20, 22, 30, 42, 45, 46, 48, 52, 54, 57, 63, 65, 70, 77, 78, 83, 84, 86, 90, 91, 95, 100, 101, 104, 106, 107, 115, 125, 130, 133, 134, 135, 136, 141, 143, 146, 150, 156, 160, 162, 164, 170, 171, 173, 174, 176, 177, 180, 185, 191, 192, 193, 196, 199, 200, 201, 203, 205, 206, 208, 210, 213, 215, 216, 221, 232, 0, 1, 2, 11, 15, 20, 25, 30, 33, 34, 42, 48, 52, 54, 60, 63, 65, 70, 75, 78, 81, 82, 83, 84, 85, 86, 95, 98, 100, 101, 104, 106, 107, 109, 115, 123, 125, 127, 130, 133, 135, 136, 143, 146, 156, 162, 165, 169, 171, 173, 174, 176, 177, 182, 183, 191, 201, 203, 206, 211, 213, 214, 215, 217, 219, 221, 227, 231, 237, 0, 1, 2, 3, 4, 5, 7, 8, 10, 11, 12, 14, 15, 19, 20, 22, 23, 24, 27, 28, 29, 30, 31, 32, 33, 34, 37, 38, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 51, 52, 53, 54, 55, 56, 57, 58, 60, 62, 63, 65, 68, 69, 70, 74, 75, 76, 77, 78, 80, 81, 83, 84, 85, 86, 87, 88, 89, 90, 93, 95, 97, 98, 100, 101, 104, 106, 107, 109, 110, 112, 115, 117, 119, 120, 121, 123, 125, 126, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 143, 146, 147, 148, 149, 150, 156, 157, 160, 162, 163, 164, 165, 166, 169, 170, 171, 172, 173, 174, 176, 177, 178, 180, 182, 183, 185, 187, 192, 193, 195, 196, 198, 199, 200, 201, 202, 203, 204, 205, 206, 208, 209, 210, 211, 213, 214, 215, 216, 217, 219, 220, 221, 222, 223, 224, 227, 228, 230, 231, 232, 234, 237, 0, 2, 3, 10, 11, 14, 15, 20, 24, 27, 34, 35, 38, 41, 45, 51, 52, 55, 57, 63, 65, 69, 70, 75, 77, 78, 81, 82, 83, 84, 86, 90, 95, 97, 100, 101, 107, 109, 113, 117, 123, 129, 134, 135, 136, 141, 143, 146, 149, 150, 158, 160, 162, 163, 165, 169, 172, 173, 174, 176, 177, 178, 179, 180, 183, 184, 187, 191, 198, 199, 201, 203, 206, 208, 209, 211, 212, 213, 214, 215, 219, 220, 223, 227, 228, 234]],
           names=[u'UC2 Literal', u'neighborhood'])



In [10]:

    
crime_summary.reset_index().head(20)









    Out[10]:






  
    
      
      UC2 Literal
      neighborhood
      offense_id
    
  
  
    
      0
      AGG ASSAULT
      Adair Park
      23
    
    
      1
      AGG ASSAULT
      Adams Park
      13
    
    
      2
      AGG ASSAULT
      Adamsville
      22
    
    
      3
      AGG ASSAULT
      Almond Park
      8
    
    
      4
      AGG ASSAULT
      Argonne Forest
      1
    
    
      5
      AGG ASSAULT
      Arlington Estates
      6
    
    
      6
      AGG ASSAULT
      Ashley Courts
      2
    
    
      7
      AGG ASSAULT
      Ashview Heights
      15
    
    
      8
      AGG ASSAULT
      Atlanta University Center
      21
    
    
      9
      AGG ASSAULT
      Atlantic Station
      7
    
    
      10
      AGG ASSAULT
      Baker Hills
      1
    
    
      11
      AGG ASSAULT
      Bankhead
      28
    
    
      12
      AGG ASSAULT
      Bankhead/Bolton
      4
    
    
      13
      AGG ASSAULT
      Beecher Hills
      1
    
    
      14
      AGG ASSAULT
      Ben Hill
      6
    
    
      15
      AGG ASSAULT
      Ben Hill Forest
      1
    
    
      16
      AGG ASSAULT
      Ben Hill Pines
      2
    
    
      17
      AGG ASSAULT
      Ben Hill Terrace
      4
    
    
      18
      AGG ASSAULT
      Benteen Park
      4
    
    
      19
      AGG ASSAULT
      Berkeley Park
      6



In [ ]:



In [ ]:



In [ ]:



In [15]:

    
df["Zone"] = df.beat // 100



In [16]:

    
df









    Out[16]:






  
    
      
      MI_PRINX
      offense_id
      rpt_date
      occur_date
      occur_time
      poss_date
      poss_time
      beat
      apt_office_prefix
      apt_office_num
      ...
      MaxOfnum_victims
      Shift
      Avg Day
      loc_type
      UC2 Literal
      neighborhood
      npu
      x
      y
      Zone
    
  
  
    
      206914
      1371687
      150562000
      05/14/2013
      05/14/2013
      09:00:00
      05/14/2013
      11:30:00
      205
      NaN
      NaN
      ...
      1
      Day
      Tue
      18.0
      LARCENY-FROM VEHICLE
      Woodfield
      C
      -84.40912
      33.82308
      2
    
    
      207443
      4346442
      150010052
      01/01/2015
      12/31/2014
      22:00:00
      01/01/2015
      00:07:00
      512
      NaN
      NaN
      ...
      1
      Morn
      Wed
      NaN
      LARCENY-FROM VEHICLE
      Downtown
      M
      -84.39361
      33.75246
      5
    
    
      207444
      4346443
      150010079
      01/01/2015
      01/01/2015
      00:03:00
      01/01/2015
      00:03:00
      606
      NaN
      3377
      ...
      1
      Morn
      Thu
      26.0
      ROBBERY-PEDESTRIAN
      Grant Park
      W
      -84.35917
      33.73991
      6
    
    
      207445
      4346444
      150010151
      01/01/2015
      12/31/2014
      23:45:00
      01/01/2015
      00:21:00
      208
      NaN
      NaN
      ...
      1
      Morn
      Thu
      18.0
      LARCENY-NON VEHICLE
      Buckhead Forest
      B
      -84.37462
      33.84564
      2
    
    
      207446
      4346445
      150010214
      01/01/2015
      01/01/2015
      00:30:00
      01/01/2015
      01:05:00
      407
      1000
      1009
      ...
      2
      Morn
      Thu
      26.0
      AGG ASSAULT
      Fairburn Mays
      H
      -84.50968
      33.74349
      4
    
    
      207447
      4346446
      150010359
      01/01/2015
      12/31/2014
      21:00:00
      01/01/2015
      00:15:00
      512
      NaN
      NaN
      ...
      1
      Eve
      Wed
      18.0
      AUTO THEFT
      Downtown
      M
      -84.39303
      33.75094
      5
    
    
      207448
      4346447
      150010370
      01/01/2015
      12/31/2014
      23:15:00
      01/01/2015
      01:47:00
      505
      NaN
      NaN
      ...
      1
      Morn
      Thu
      13.0
      LARCENY-FROM VEHICLE
      Midtown
      E
      -84.38882
      33.77511
      5
    
    
      207449
      4346448
      150010413
      01/01/2015
      12/31/2014
      23:00:00
      01/01/2015
      02:00:00
      303
      NaN
      NaN
      ...
      3
      Morn
      Thu
      13.0
      LARCENY-FROM VEHICLE
      Mechanicsville
      V
      -84.39903
      33.73631
      3
    
    
      207450
      4346449
      150010417
      01/01/2015
      01/01/2015
      02:00:00
      01/01/2015
      02:03:00
      304
      NaN
      NaN
      ...
      1
      Morn
      Thu
      14.0
      LARCENY-NON VEHICLE
      Summerhill
      V
      -84.38686
      33.74229
      3
    
    
      207451
      4346450
      150010419
      01/01/2015
      01/01/2015
      01:50:00
      01/01/2015
      02:00:00
      403
      NaN
      A
      ...
      1
      Morn
      Thu
      20.0
      AGG ASSAULT
      Oakland City
      S
      -84.42831
      33.71759
      4
    
    
      207452
      4346451
      150010442
      01/01/2015
      01/01/2015
      00:30:00
      01/01/2015
      01:45:00
      302
      NaN
      4347
      ...
      1
      Morn
      Thu
      26.0
      AUTO THEFT
      Pittsburgh
      V
      -84.40177
      33.73396
      3
    
    
      207453
      4346452
      150010468
      01/01/2015
      01/01/2015
      02:15:00
      01/01/2015
      02:30:00
      509
      ROOM
      702
      ...
      1
      Morn
      Thu
      NaN
      AGG ASSAULT
      Downtown
      M
      -84.38406
      33.76139
      5
    
    
      207454
      4346453
      150010565
      01/01/2015
      12/31/2014
      22:00:00
      01/01/2015
      03:09:00
      508
      NaN
      NaN
      ...
      2
      Morn
      Thu
      18.0
      LARCENY-FROM VEHICLE
      Downtown
      M
      -84.38840
      33.76444
      5
    
    
      207455
      4346454
      150010601
      01/01/2015
      01/01/2015
      03:00:00
      01/01/2015
      07:00:00
      509
      NaN
      NaN
      ...
      2
      Morn
      Thu
      13.0
      AGG ASSAULT
      Downtown
      M
      -84.38554
      33.76929
      5
    
    
      207456
      4346455
      150010604
      01/01/2015
      12/31/2014
      19:45:00
      01/01/2015
      03:20:00
      104
      NaN
      8201
      ...
      1
      Morn
      Wed
      26.0
      BURGLARY-RESIDENCE
      Ashview Heights
      T
      -84.42057
      33.74647
      1
    
    
      207457
      4346456
      150010608
      01/01/2015
      01/01/2015
      02:48:00
      01/01/2015
      03:26:00
      305
      NaN
      NaN
      ...
      1
      Morn
      Thu
      NaN
      ROBBERY-PEDESTRIAN
      South Atlanta
      Y
      -84.38374
      33.71730
      3
    
    
      207458
      4346457
      150010661
      01/01/2015
      01/01/2015
      02:00:00
      01/01/2015
      02:40:00
      207
      NaN
      NaN
      ...
      1
      Morn
      Thu
      21.0
      ROBBERY-PEDESTRIAN
      Underwood Hills
      D
      -84.41623
      33.80076
      2
    
    
      207459
      4346458
      150010872
      01/01/2015
      01/01/2015
      05:40:00
      01/01/2015
      06:00:00
      611
      NaN
      NaN
      ...
      2
      Morn
      Thu
      26.0
      BURGLARY-RESIDENCE
      The Villages at East Lake
      O
      -84.31650
      33.74577
      6
    
    
      207460
      4346459
      150010893
      01/01/2015
      01/01/2015
      04:30:00
      01/01/2015
      05:00:00
      304
      NaN
      NaN
      ...
      1
      Morn
      Thu
      13.0
      AGG ASSAULT
      Summerhill
      V
      -84.38909
      33.74245
      3
    
    
      207461
      4346460
      150010977
      01/01/2015
      01/01/2015
      06:30:00
      01/01/2015
      06:45:00
      602
      NaN
      NaN
      ...
      1
      Morn
      Thu
      13.0
      LARCENY-NON VEHICLE
      Old Fourth Ward
      M
      -84.37191
      33.76728
      6
    
    
      207462
      4346461
      150010989
      01/01/2015
      12/31/2014
      21:00:00
      01/01/2015
      08:22:00
      108
      NaN
      NaN
      ...
      2
      Morn
      Thu
      20.0
      LARCENY-FROM VEHICLE
      Dixie Hills
      J
      -84.45890
      33.75336
      1
    
    
      207463
      4346462
      150011005
      01/01/2015
      12/31/2014
      15:00:00
      01/01/2015
      06:00:00
      303
      NaN
      NaN
      ...
      1
      Eve
      Wed
      NaN
      LARCENY-FROM VEHICLE
      Mechanicsville
      V
      -84.39806
      33.73833
      3
    
    
      207464
      4346463
      150011023
      01/01/2015
      01/01/2015
      08:18:00
      01/01/2015
      08:48:00
      409
      NaN
      NaN
      ...
      1
      Day
      Thu
      20.0
      AGG ASSAULT
      Southwest
      R
      -84.49750
      33.69867
      4
    
    
      207465
      4346464
      150011124
      01/01/2015
      01/01/2015
      02:00:00
      01/01/2015
      02:30:00
      112
      NaN
      NaN
      ...
      1
      Morn
      Thu
      3.0
      ROBBERY-PEDESTRIAN
      Collier Heights
      I
      -84.49110
      33.78520
      1
    
    
      207466
      4346465
      150011131
      01/01/2015
      12/31/2014
      11:00:00
      01/01/2015
      05:00:00
      508
      NaN
      637
      ...
      4
      Unk
      Wed
      14.0
      LARCENY-NON VEHICLE
      Downtown
      M
      -84.39003
      33.75896
      5
    
    
      207467
      4346466
      150011181
      01/01/2015
      12/31/2014
      20:30:00
      01/01/2015
      10:45:00
      103
      NaN
      NaN
      ...
      1
      Morn
      Thu
      NaN
      AUTO THEFT
      English Avenue
      L
      -84.40498
      33.76527
      1
    
    
      207468
      4346467
      150011209
      01/01/2015
      01/01/2015
      10:15:00
      01/01/2015
      10:15:00
      603
      NaN
      NaN
      ...
      0
      Day
      Thu
      18.0
      LARCENY-NON VEHICLE
      Old Fourth Ward
      M
      -84.37140
      33.77097
      6
    
    
      207469
      4346468
      150011234
      01/01/2015
      12/31/2014
      22:30:00
      01/01/2015
      03:00:00
      302
      NaN
      1423
      ...
      1
      Morn
      Thu
      18.0
      AUTO THEFT
      Pittsburgh
      V
      -84.40177
      33.73396
      3
    
    
      207470
      4346469
      150011237
      01/01/2015
      12/31/2014
      22:30:00
      01/01/2015
      11:30:00
      410
      NaN
      C17
      ...
      1
      Morn
      Thu
      26.0
      LARCENY-NON VEHICLE
      Fort Valley
      R
      -84.45149
      33.70055
      4
    
    
      207471
      4346470
      150011253
      01/01/2015
      12/31/2014
      23:47:00
      01/01/2015
      11:38:00
      501
      NaN
      NaN
      ...
      1
      Morn
      Thu
      26.0
      LARCENY-NON VEHICLE
      Atlantic Station
      E
      -84.39541
      33.79231
      5
    
    
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      237423
      4376422
      153651689
      12/31/2015
      11/01/2015
      12:00:00
      12/31/2015
      15:45:00
      402
      NaN
      NaN
      ...
      1
      Unk
      Unk
      20.0
      BURGLARY-RESIDENCE
      Oakland City
      S
      -84.42550
      33.72696
      4
    
    
      237424
      4376423
      153651697
      12/31/2015
      12/18/2015
      12:00:00
      12/20/2015
      12:00:00
      612
      NaN
      NaN
      ...
      1
      Unk
      Unk
      18.0
      LARCENY-NON VEHICLE
      East Atlanta
      W
      -84.33524
      33.73099
      6
    
    
      237425
      4376424
      153651707
      12/31/2015
      12/31/2015
      15:45:00
      12/31/2015
      16:15:00
      509
      NaN
      NaN
      ...
      1
      Eve
      Thu
      9.0
      LARCENY-NON VEHICLE
      Downtown
      M
      -84.38702
      33.76044
      5
    
    
      237426
      4376425
      153651745
      12/31/2015
      12/23/2015
      22:00:00
      12/31/2015
      14:00:00
      310
      NaN
      34C
      ...
      1
      Unk
      Unk
      26.0
      BURGLARY-RESIDENCE
      Browns Mill Park
      Z
      -84.39417
      33.68294
      3
    
    
      237427
      4376426
      153651751
      12/31/2015
      12/31/2015
      15:30:00
      12/31/2015
      16:00:00
      306
      NaN
      NaN
      ...
      4
      Eve
      Thu
      18.0
      LARCENY-FROM VEHICLE
      Sylvan Hills
      X
      -84.41113
      33.69731
      3
    
    
      237428
      4376427
      153651774
      12/31/2015
      12/31/2015
      06:00:00
      12/31/2015
      13:30:00
      202
      NaN
      NaN
      ...
      1
      Day
      Thu
      20.0
      LARCENY-FROM VEHICLE
      Randall Mill
      A
      -84.42172
      33.85375
      2
    
    
      237429
      4376428
      153651776
      12/31/2015
      12/31/2015
      01:00:00
      12/31/2015
      16:45:00
      401
      NaN
      NaN
      ...
      1
      Day
      Thu
      23.0
      AUTO THEFT
      West End
      T
      -84.41435
      33.74042
      4
    
    
      237430
      4376429
      153651836
      12/31/2015
      12/31/2015
      14:00:00
      12/31/2015
      16:30:00
      213
      NaN
      NaN
      ...
      1
      Eve
      Thu
      NaN
      LARCENY-NON VEHICLE
      Piedmont Heights
      F
      -84.37101
      33.79789
      2
    
    
      237431
      4376430
      153651969
      12/31/2015
      12/31/2015
      17:45:00
      12/31/2015
      18:15:00
      210
      NaN
      NaN
      ...
      1
      Eve
      Thu
      8.0
      LARCENY-NON VEHICLE
      Lenox
      B
      -84.36212
      33.84676
      2
    
    
      237432
      4376431
      153651972
      12/31/2015
      12/31/2015
      18:18:00
      12/31/2015
      18:18:00
      611
      NaN
      NaN
      ...
      1
      Eve
      Thu
      20.0
      BURGLARY-RESIDENCE
      East Lake
      O
      -84.30354
      33.74624
      6
    
    
      237433
      4376432
      153652007
      12/31/2015
      12/31/2015
      18:20:00
      12/31/2015
      18:38:00
      403
      NaN
      NaN
      ...
      1
      Eve
      Thu
      31.0
      AGG ASSAULT
      Oakland City
      S
      -84.42892
      33.71896
      4
    
    
      237434
      4376433
      153652008
      12/31/2015
      12/31/2015
      18:30:00
      12/31/2015
      18:38:00
      101
      NaN
      NaN
      ...
      1
      Eve
      Thu
      NaN
      ROBBERY-PEDESTRIAN
      Atlanta University Center
      T
      -84.40436
      33.75323
      1
    
    
      237435
      4376434
      153652014
      12/31/2015
      12/31/2015
      16:45:00
      12/31/2015
      17:00:00
      503
      NaN
      NaN
      ...
      1
      Eve
      Thu
      13.0
      AUTO THEFT
      Midtown
      E
      -84.38387
      33.78305
      5
    
    
      237436
      4376435
      153652081
      12/31/2015
      12/31/2015
      19:00:00
      12/31/2015
      19:20:00
      303
      NaN
      NaN
      ...
      1
      Eve
      Thu
      26.0
      AGG ASSAULT
      Mechanicsville
      V
      -84.39741
      33.74116
      3
    
    
      237437
      4376436
      153652105
      12/31/2015
      12/31/2015
      19:05:00
      12/31/2015
      19:20:00
      303
      NaN
      6
      ...
      1
      Eve
      Thu
      26.0
      AGG ASSAULT
      Mechanicsville
      V
      -84.40200
      33.74187
      3
    
    
      237438
      4376437
      153652157
      12/31/2015
      12/31/2015
      19:10:00
      12/31/2015
      19:45:00
      207
      NaN
      NaN
      ...
      2
      Eve
      Thu
      18.0
      LARCENY-FROM VEHICLE
      Berkeley Park
      D
      -84.41575
      33.79835
      2
    
    
      237439
      4376438
      153652170
      12/31/2015
      12/31/2015
      18:06:00
      12/31/2015
      18:12:00
      208
      NaN
      NaN
      ...
      1
      Eve
      Thu
      8.0
      LARCENY-NON VEHICLE
      North Buckhead
      B
      -84.36235
      33.85166
      2
    
    
      237440
      4376439
      153652249
      12/31/2015
      12/31/2015
      19:45:00
      12/31/2015
      20:00:00
      412
      NaN
      NaN
      ...
      1
      Eve
      Thu
      13.0
      ROBBERY-PEDESTRIAN
      Heritage Valley
      P
      -84.50263
      33.70432
      4
    
    
      237441
      4376440
      153652313
      12/31/2015
      12/31/2015
      11:30:00
      12/31/2015
      20:00:00
      113
      NaN
      B313
      ...
      1
      Eve
      Thu
      26.0
      BURGLARY-RESIDENCE
      Carey Park
      G
      -84.47292
      33.78810
      1
    
    
      237442
      4376441
      153652478
      12/31/2015
      12/31/2015
      19:00:00
      12/31/2015
      20:30:00
      204
      NaN
      NaN
      ...
      2
      Eve
      Thu
      18.0
      LARCENY-FROM VEHICLE
      Underwood Hills
      D
      -84.42161
      33.80224
      2
    
    
      237443
      4376442
      153652499
      12/31/2015
      12/31/2015
      09:00:00
      12/31/2015
      09:00:00
      208
      NaN
      NaN
      ...
      1
      Day
      Thu
      18.0
      LARCENY-FROM VEHICLE
      Buckhead Forest
      B
      -84.37291
      33.84286
      2
    
    
      237444
      4376443
      153652534
      12/31/2015
      12/31/2015
      21:26:00
      12/31/2015
      22:28:00
      313
      NaN
      NaN
      ...
      1
      Eve
      Thu
      20.0
      AUTO THEFT
      Blair Villa/Poole Creek
      Z
      -84.37888
      33.66185
      3
    
    
      237445
      4376444
      153652703
      12/31/2015
      12/31/2015
      22:00:00
      12/31/2015
      22:30:00
      107
      NaN
      NaN
      ...
      1
      Eve
      Thu
      20.0
      BURGLARY-RESIDENCE
      Grove Park
      J
      -84.44629
      33.77318
      1
    
    
      237446
      4376445
      153652724
      12/31/2015
      12/31/2015
      22:00:00
      12/31/2015
      22:30:00
      508
      NaN
      NaN
      ...
      1
      Eve
      Thu
      NaN
      AGG ASSAULT
      Downtown
      M
      -84.39152
      33.75700
      5
    
    
      237447
      4376446
      153652840
      12/31/2015
      12/31/2015
      22:10:00
      12/31/2015
      22:30:00
      612
      NaN
      NaN
      ...
      1
      Eve
      Thu
      18.0
      AUTO THEFT
      East Atlanta
      W
      -84.33588
      33.73126
      6
    
    
      237448
      4376447
      153652866
      12/31/2015
      12/31/2015
      22:30:00
      12/31/2015
      23:00:00
      610
      NaN
      NaN
      ...
      1
      Eve
      Thu
      23.0
      LARCENY-NON VEHICLE
      Kirkwood
      O
      -84.32665
      33.74774
      6
    
    
      237449
      4376448
      153652876
      12/31/2015
      12/31/2015
      17:00:00
      12/31/2015
      23:00:00
      303
      NaN
      1408
      ...
      1
      Eve
      Thu
      26.0
      BURGLARY-RESIDENCE
      Mechanicsville
      V
      -84.40649
      33.74005
      3
    
    
      237450
      4376449
      153652888
      01/01/2016
      12/31/2015
      23:05:00
      01/01/2016
      00:50:00
      313
      NaN
      22
      ...
      1
      Morn
      Thu
      13.0
      AGG ASSAULT
      South River Gardens
      Z
      -84.36187
      33.67102
      3
    
    
      237451
      4376450
      153653047
      12/31/2015
      12/31/2015
      21:30:00
      12/31/2015
      23:47:00
      603
      NaN
      NaN
      ...
      2
      Eve
      Thu
      13.0
      LARCENY-FROM VEHICLE
      Old Fourth Ward
      M
      -84.36450
      33.77073
      6
    
    
      237452
      4376451
      153658045
      12/31/2015
      12/31/2015
      13:02:00
      12/31/2015
      13:15:00
      704
      NaN
      NaN
      ...
      1
      Day
      Thu
      NaN
      LARCENY-NON VEHICLE
      NaN
      NaN
      -84.44824
      33.63751
      7
    
  

30011 rows × 24 columns

Exploring Dates



In [11]:

    
df[['offense_id', 'occur_date', 'occur_time', 'rpt_date']][1:10]

Convert into date-time type



In [12]:

    
df['occur_ts'] = pd.to_datetime(df.occur_date+' '+df.occur_time)



In [19]:

    
#df[['offense_id', 'occur_date', 'occur_time', 'occur_ts', 'rpt_date']][1:10]



In [ ]:



In [20]:

    
df['occur_ts'] = pd.to_datetime(df.occur_date+' '+df.occur_time)



In [21]:

    
df['occur_month'] = df['occur_ts'].map(lambda x: x.month)
df['occur_woy'] = df.occur_ts.dt.weekofyear



In [22]:

    
df.describe()









    Out[22]:






  
    
      
      MI_PRINX
      offense_id
      beat
      MinOfucr
      MaxOfnum_victims
      loc_type
      x
      y
      Zone
      occur_month
      occur_woy
    
  
  
    
      count
      3.001100e+04
      3.001100e+04
      30011.000000
      30011.000000
      30011.000000
      26903.000000
      30011.000000
      30011.000000
      30011.000000
      29997.000000
      29997.000000
    
    
      mean
      4.361347e+06
      1.518675e+08
      359.417813
      594.219886
      1.194695
      21.109356
      -84.408346
      33.756058
      3.527307
      6.654399
      27.453312
    
    
      std
      1.931052e+04
      1.029128e+06
      169.563281
      114.321851
      0.799062
      16.579831
      0.046894
      0.045981
      1.700308
      3.365509
      14.700785
    
    
      min
      1.371687e+06
      1.500101e+08
      101.000000
      210.000000
      0.000000
      1.000000
      -84.546070
      33.637450
      1.000000
      1.000000
      1.000000
    
    
      25%
      4.353944e+06
      1.510128e+08
      208.000000
      512.000000
      1.000000
      13.000000
      -84.432445
      33.729060
      2.000000
      4.000000
      15.000000
    
    
      50%
      4.361446e+06
      1.518913e+08
      401.000000
      640.000000
      1.000000
      18.000000
      -84.398210
      33.756000
      4.000000
      7.000000
      28.000000
    
    
      75%
      4.368948e+06
      1.527329e+08
      505.000000
      670.000000
      1.000000
      21.000000
      -84.374420
      33.781470
      5.000000
      10.000000
      40.000000
    
    
      max
      4.376451e+06
      1.536580e+08
      709.000000
      730.000000
      44.000000
      99.000000
      -84.290480
      33.883250
      7.000000
      12.000000
      53.000000



In [23]:

    
df.shape









    Out[23]:





(30011, 27)



In [24]:

    
df.columns









    Out[24]:





Index([         u'MI_PRINX',        u'offense_id',          u'rpt_date',
              u'occur_date',        u'occur_time',         u'poss_date',
               u'poss_time',              u'beat', u'apt_office_prefix',
          u'apt_office_num',          u'location',          u'MinOfucr',
           u'MinOfibr_code',        u'dispo_code',  u'MaxOfnum_victims',
                   u'Shift',           u'Avg Day',          u'loc_type',
             u'UC2 Literal',      u'neighborhood',               u'npu',
                       u'x',                 u'y',              u'Zone',
                u'occur_ts',       u'occur_month',         u'occur_woy'],
      dtype='object')



In [13]:

    
df.iloc[9]









    Out[13]:





MI_PRINX                            4346450
offense_id                        150010419
rpt_date                         01/01/2015
occur_date                       01/01/2015
occur_time                         01:50:00
poss_date                        01/01/2015
poss_time                          02:00:00
beat                                    403
apt_office_prefix                       NaN
apt_office_num                            A
location             1231 CAMPBELLTON PL SW
MinOfucr                                430
MinOfibr_code                          1315
dispo_code                               10
MaxOfnum_victims                          1
Shift                                  Morn
Avg Day                                 Thu
loc_type                                 20
UC2 Literal                     AGG ASSAULT
neighborhood                   Oakland City
npu                                       S
x                                  -84.4283
y                                   33.7176
occur_ts                2015-01-01 01:50:00
Name: 207451, dtype: object



In [27]:

    
#resdf.index



In [28]:

    
resdf.loc[['AUTO_THEFT', 6]]









    



---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-28-bef663febeaa> in <module>()
----> 1 resdf.loc[['AUTO_THEFT', 6]]

NameError: name 'resdf' is not defined



In [16]:

    
resdf = df.groupby(['UC2 Literal', 'occur_date']).offense_id.count()
resdf









    Out[16]:





UC2 Literal        occur_date
AGG ASSAULT        01/01/2009    1
                   01/01/2015    6
                   01/02/2015    6
                   01/03/2015    7
                   01/04/2015    4
                   01/05/2015    3
                   01/06/2015    4
                   01/07/2015    6
                   01/08/2015    5
                   01/09/2015    7
                   01/10/2015    3
                   01/11/2015    9
                   01/12/2015    3
                   01/13/2015    1
                   01/14/2015    7
                   01/15/2015    2
                   01/16/2015    3
                   01/17/2015    3
                   01/18/2015    7
                   01/19/2015    9
                   01/20/2015    5
                   01/21/2015    2
                   01/22/2015    6
                   01/23/2015    3
                   01/24/2015    3
                   01/25/2015    3
                   01/26/2015    8
                   01/27/2015    6
                   01/28/2015    5
                   01/29/2015    9
                                ..
ROBBERY-RESIDENCE  10/02/2015    1
                   10/03/2015    1
                   10/04/2015    1
                   10/08/2015    1
                   10/10/2015    1
                   10/18/2015    1
                   10/20/2015    1
                   10/21/2015    1
                   10/24/2015    1
                   11/01/2015    1
                   11/02/2015    2
                   11/09/2015    2
                   11/11/2015    1
                   11/13/2015    1
                   11/20/2015    1
                   11/23/2014    1
                   11/27/2015    1
                   11/28/2015    1
                   12/02/2015    1
                   12/04/2015    1
                   12/06/2015    1
                   12/10/2015    1
                   12/11/2015    2
                   12/19/2015    1
                   12/21/2015    1
                   12/22/2015    1
                   12/27/2015    1
                   12/29/2015    1
                   12/30/2015    1
                   12/31/2015    1
Name: offense_id, dtype: int64



In [17]:

    
resdf['BURGLARY-RESIDENCE'].as_matrix()









    Out[17]:





array([13,  1,  6, 12,  7,  9, 12,  6,  1, 11, 10,  1, 10,  7, 17, 16, 10,
       18, 15, 18, 10,  7, 16,  1,  8, 10,  9, 11, 11, 16,  8, 15, 16, 10,
        9, 11, 14,  4, 13,  9,  8, 13,  8,  3,  6, 10, 10, 16,  5,  2,  6,
        3,  3,  7, 10,  4,  8, 13,  9,  5,  7, 13, 13, 12,  9,  6, 12, 12,
       13,  5,  8,  9,  7,  9,  9, 14,  6,  8, 15,  9, 12,  7, 13,  8,  5,
       10, 16, 15,  9,  6, 13,  3, 14,  9, 17, 15, 17, 12,  4, 18,  9, 14,
        9, 15,  5, 11, 11, 13, 11,  7,  8,  6,  6, 16, 11,  9,  8,  6, 10,
        3,  7, 13,  8, 12, 12,  8,  9, 11, 17,  7, 17, 15,  8,  4,  9, 23,
        1, 20, 12, 10,  1,  7,  6, 21, 11, 12,  1, 12, 14, 13,  6,  5, 18,
       18, 15, 13, 13,  6, 11, 14, 13,  8, 15, 10,  6, 11,  6,  4,  7,  7,
       12,  5,  8, 10, 14, 15, 12, 12,  9, 11, 17, 11, 17, 22,  9,  6, 10,
       11, 10, 19, 14, 11,  2, 13, 10,  4, 11, 16,  9,  6, 12, 16,  9, 14,
       13,  8,  6, 11,  5, 13,  6,  7,  9,  8, 19,  9, 10,  7, 10,  9,  3,
        9,  9, 12,  4, 12,  6,  5, 13,  7,  1,  9, 10,  1,  9, 11,  4, 10,
       10,  8, 14,  7,  8,  5,  9, 14, 14,  8, 17,  8, 13, 13, 13, 13,  8,
        9,  7,  6,  8, 21, 10, 11, 17,  6, 10,  6, 12, 10,  6,  8, 10,  8,
       13, 13,  7,  8, 11,  7,  5, 12, 15,  8, 17, 12, 10,  1,  9, 10, 14,
       10, 10, 12,  7,  8, 12,  9, 14,  5,  8,  7,  6,  9, 10,  7, 20, 13,
        1, 19,  7, 12, 16,  1, 11,  9, 13, 11,  8, 16,  8, 11,  7,  8, 11,
        4, 12,  1, 18,  9, 15, 12,  3,  2,  4, 15, 17, 12,  1,  8,  2, 16,
       14, 11, 13, 11, 15,  9, 15,  6, 11,  1, 14, 19, 18, 12, 18,  1, 17,
        8,  1, 14,  1, 14, 13,  2, 18, 19,  1,  8, 13, 21,  1, 10,  1, 20,
       14,  1, 21,  8,  2, 20, 13,  1,  9,  1,  8,  2, 10, 10,  1,  8,  6,
       12,  1,  6,  2, 15,  2,  9])



In [19]:

    
resdf['BURGLARY-RESIDENCE'].iloc(0)









    Out[19]:





<pandas.core.indexing._iLocIndexer at 0x7f73610>



In [22]:

    
resdf['BURGLARY-RESIDENCE']









    Out[22]:





occur_date
01/01/2015    13
01/02/2014     1
01/02/2015     6
01/03/2015    12
01/04/2015     7
01/05/2015     9
01/06/2015    12
01/07/2015     6
01/08/2014     1
01/08/2015    11
01/09/2015    10
01/10/2014     1
01/10/2015    10
01/11/2015     7
01/12/2015    17
01/13/2015    16
01/14/2015    10
01/15/2015    18
01/16/2015    15
01/17/2015    18
01/18/2015    10
01/19/2015     7
01/20/2015    16
01/21/2014     1
01/21/2015     8
01/22/2015    10
01/23/2015     9
01/24/2015    11
01/25/2015    11
01/26/2015    16
              ..
12/13/2015    13
12/14/2015    21
12/15/2014     1
12/15/2015    10
12/16/2014     1
12/16/2015    20
12/17/2015    14
12/18/2014     1
12/18/2015    21
12/19/2015     8
12/20/2014     2
12/20/2015    20
12/21/2015    13
12/22/2014     1
12/22/2015     9
12/23/2014     1
12/23/2015     8
12/24/2014     2
12/24/2015    10
12/25/2015    10
12/26/2014     1
12/26/2015     8
12/27/2015     6
12/28/2015    12
12/29/2014     1
12/29/2015     6
12/30/2014     2
12/30/2015    15
12/31/2014     2
12/31/2015     9
Name: offense_id, dtype: int64



In [25]:

    
%matplotlib inline
fig = plt.figure(figsize=(10,6)) # 10inx10in
#plt.plot(resdf['BURGLARY-RESIDENCE'].index, resdf['BURGLARY-RESIDENCE'])
plt.scatter(resdf['BURGLARY-RESIDENCE'].index, resdf['BURGLARY-RESIDENCE'].values, marker='x')
# plt.scatter(resdf['BURGLARY-NONRES'].index, resdf['BURGLARY-NONRES'], marker='o')

# plt.ylim(0, 500)
# plt.title('BURGLARY-RESIDENCE')
# plt.xticks(range(13), ['', 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'])
# fig.savefig('BurglaryResidence_over_month.svg')
# x = 1









    



---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-25-c9eb703f0c45> in <module>()
      2 fig = plt.figure(figsize=(10,6)) # 10inx10in
      3 #plt.plot(resdf['BURGLARY-RESIDENCE'].index, resdf['BURGLARY-RESIDENCE'])
----> 4 plt.scatter(resdf['BURGLARY-RESIDENCE'].index, resdf['BURGLARY-RESIDENCE'].values, marker='x')
      5 # plt.scatter(resdf['BURGLARY-NONRES'].index, resdf['BURGLARY-NONRES'], marker='o')
      6 

/usr/lib64/python2.7/site-packages/matplotlib/pyplot.pyc in scatter(x, y, s, c, marker, cmap, norm, vmin, vmax, alpha, linewidths, verts, edgecolors, hold, data, **kwargs)
   3256                          vmin=vmin, vmax=vmax, alpha=alpha,
   3257                          linewidths=linewidths, verts=verts,
-> 3258                          edgecolors=edgecolors, data=data, **kwargs)
   3259     finally:
   3260         ax.hold(washold)

/usr/lib64/python2.7/site-packages/matplotlib/__init__.pyc in inner(ax, *args, **kwargs)
   1817                     warnings.warn(msg % (label_namer, func.__name__),
   1818                                   RuntimeWarning, stacklevel=2)
-> 1819             return func(ax, *args, **kwargs)
   1820         pre_doc = inner.__doc__
   1821         if pre_doc is None:

/usr/lib64/python2.7/site-packages/matplotlib/axes/_axes.pyc in scatter(self, x, y, s, c, marker, cmap, norm, vmin, vmax, alpha, linewidths, verts, edgecolors, **kwargs)
   3836 
   3837         # c will be unchanged unless it is the same length as x:
-> 3838         x, y, s, c = cbook.delete_masked_points(x, y, s, c)
   3839 
   3840         scales = s   # Renamed for readability below.

/usr/lib64/python2.7/site-packages/matplotlib/cbook.pyc in delete_masked_points(*args)
   1846         return ()
   1847     if (is_string_like(args[0]) or not iterable(args[0])):
-> 1848         raise ValueError("First argument must be a sequence")
   1849     nrecs = len(args[0])
   1850     margs = []

ValueError: First argument must be a sequence



In [ ]:

    
def getTheMonth(x):
    return x.month

df['occur_month'] = df['occur_ts'].map(getTheMonth)



In [ ]:

    
df = pd.read_excel('/home/data/APD/COBRA083016_2015.xlsx', sheetname='Query')
df['occur_ts'] = pd.to_datetime(df.occur_date+' '+df.occur_time)
df['occur_month'] = df['occur_ts'].map(lambda x: x.month)
df['occur_woy'] = df.occur_ts.dt.weekofyear



In [ ]:

    
%matplotlib inline
resdf = df.groupby(['UC2 Literal', 'occur_month']).offense_id.count()
fig = plt.figure(figsize=(10,6))
plt.scatter(resdf['BURGLARY-RESIDENCE'].index, resdf['BURGLARY-RESIDENCE'], marker='x')
plt.ylim(0, 500)
plt.title('BURGLARY-RESIDENCE')
plt.xticks(range(13), ['', 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'])
plt.savefig('quiz3-burglary-residence.png')

''



In [ ]:

    
plt.savefig('quiz3-burglary-residence.png')

Part 1 - Observations from the data



In [28]:

    
pd.unique(df) # how to get duplicate records?









    



---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-28-f32e51c668f7> in <module>()
----> 1 pd.unique(df)

/usr/lib64/python2.7/site-packages/pandas/core/algorithms.pyc in unique(values)
    100     uniques
    101     """
--> 102     values = com._asarray_tuplesafe(values)
    103 
    104     f = lambda htype, caster: _unique_generic(values, htype, caster)

/usr/lib64/python2.7/site-packages/pandas/core/common.pyc in _asarray_tuplesafe(values, dtype)
    381             except ValueError:
    382                 # we have a list-of-list
--> 383                 result[:] = [tuple(x) for x in values]
    384 
    385     return result

ValueError: cannot copy sequence with size 24 to array axis with dimension 30011



In [29]:

    
pd.unique(df['UC2 Literal'])









    Out[29]:





array([u'LARCENY-FROM VEHICLE', u'ROBBERY-PEDESTRIAN',
       u'LARCENY-NON VEHICLE', u'AGG ASSAULT', u'AUTO THEFT',
       u'BURGLARY-RESIDENCE', u'ROBBERY-RESIDENCE', u'BURGLARY-NONRES',
       u'ROBBERY-COMMERCIAL', u'RAPE'], dtype=object)



In [30]:

    
len(pd.unique(df.MI_PRINX))









    Out[30]:





30011

Part 2 - Seasonal Model



In [ ]:

    
## load complete dataset
dff = pd.read_excel('/home/data/APD/COBRA083016.xlsx', sheetname='Query')



In [ ]:

    
dff.shape



In [ ]:

    
for evt in ['occur', 'poss']:
    dff['%s_ts'%evt] = pd.to_datetime(dff['%s_date'%evt]+' '+dff['%s_time'%evt])
dff['rpt_ts'] = pd.to_datetime(dff.rpt_date)



In [ ]:

    
', '.join(dff.columns)



In [ ]:

    
dff['occur_year'] = dff.occur_ts.dt.year
dff['occur_month'] = dff.occur_ts.dt.month
dff['occur_dayweek'] = dff.occur_ts.dt.dayofweek

Crime per year

Let's look at the



In [ ]:

    
crime_year = dff[dff.occur_year.between(2009, 2015)].groupby(by=['UC2 Literal', 'occur_year']).offense_id.count()



In [ ]:

    
%matplotlib inline
fig = plt.figure(figsize=(40,30))
crime_types = crime_year.index.levels[0]
years = crime_year.index.levels[1]
for c in range(len(crime_types)):
    y_max = max(crime_year.loc[crime_types[c]])
    
    plt.subplot(4,3,c+1)
    plt.hlines(crime_year.loc[crime_types[c]].iloc[-1]*100/y_max, years[0], years[-1], linestyles="dashed", color="r")
    plt.bar(crime_year.loc[crime_types[c]].index, crime_year.loc[crime_types[c]]*100/y_max, label=crime_types[c], alpha=0.5)
    ##plt.legend()
    plt.ylim(0, 100)
    plt.xticks(years+0.4, [str(int(y)) for y in years], rotation=0, fontsize=24)
    plt.yticks([0,20,40,60,80,100], ['0%','20%','40%','60%','80%','100%'], fontsize=24)
    plt.title(crime_types[c], fontsize=30)
    None

Let's look at residential burglary.



In [ ]:

    
c = 3
crime_types[c]



In [ ]:

    
crime_year_month = dff[dff.occur_year.between(2009, 2015)].groupby(by=['UC2 Literal', 'occur_year', 'occur_month']).offense_id.count()



In [ ]:

    
c = 3 ## 'BURGLARY-RESIDENCE'
resburglaries = crime_year_month.loc[crime_types[c]]
fig = plt.figure(figsize=(20,10))
for y in years:
    plt.plot(resburglaries.loc[y].index, resburglaries.loc[y], label=("%4.0f"%y))
plt.legend()
plt.title("Seasonal Trends - %s"%crime_types[c], fontsize=20)
plt.xticks(range(13), ['', 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'])
plt.xlim(0,13)
None

Normalized over the annual average



In [ ]:

    
c = 3 ## 'BURGLARY-RESIDENCE'
fig = plt.figure(figsize=(20,10))
for y in years:
    avg = resburglaries.loc[y].mean()
    plt.hlines(avg, 1, 13, linestyle='dashed')
    plt.plot(resburglaries.loc[y].index, resburglaries.loc[y], label=("%4.0f"%y))
plt.legend()
plt.title("Seasonal Trends - %s (with annuale averages)"%crime_types[c], fontsize=20)
plt.xticks(list(range(1,13)), ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'])
plt.xlim(0,13)
None



In [ ]:

    
c = 3 ## 'BURGLARY-RESIDENCE'
fig = plt.figure(figsize=(20,10))
for y in years:
    avg = resburglaries.loc[y].mean()
    std = resburglaries.loc[y].std()
    ##plt.hlines(avg, 1, 13, linestyle='dashed')
    plt.plot(resburglaries.loc[y].index, (resburglaries.loc[y]-avg)/std, label=("%4.0f"%y))
plt.legend()
plt.title("Seasonal Trends - %s (normalized)"%crime_types[c], fontsize=20)
plt.xticks(list(range(1,13)), ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'])
plt.xlim(0,13)
plt.ylabel("Standard deviations $\sigma_y$")
None



In [ ]:



In [ ]:

    
seasonal_adjust = resburglaries.reset_index().groupby(by=['occur_month']).offense_id.agg('mean')



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:

Fitting the regression line

Suppose there are $n$ data points {{math|{(''x_i'', ''y_i''), ''i'' {{=}} 1, ..., ''n''}.}} The function that describes x and y is:

$$y_i = \alpha + \beta x_i + \varepsilon_i.$$

The goal is to find the equation of the straight line

$$y = \alpha + \beta x,$$

which would provide a "best" fit for the data points. Here the "best" will be understood as in the [[Ordinary least squares|least-squares]] approach: a line that minimizes the sum of squared residuals of the linear regression model. In other words, {{mvar|α}} (the {{mvar|y}}-intercept) and {{mvar|β}} (the slope) solve the following minimization problem:

$$\text{Find }\min_{\alpha,\,\beta} Q(\alpha, \beta), \qquad \text{for } Q(\alpha, \beta) = \sum_{i=1}^n\varepsilon_i^{\,2} = \sum_{i=1}^n (y_i - \alpha - \beta x_i)^2\ $$

By using either [[calculus]], the geometry of [[inner product space]]s, or simply expanding to get a quadratic expression in {{mvar|α}} and {{mvar|β}}, it can be shown that the values of {{mvar|α}} and {{mvar|β}} that minimize the objective function {{mvar|Q}}Kenney, J. F. and Keeping, E. S. (1962) "Linear Regression and Correlation." Ch. 15 in ''Mathematics of Statistics'', Pt. 1, 3rd ed. Princeton, NJ: Van Nostrand, pp. 252–285 are

: $\begin{align}
\hat\beta &= \frac{ \sum_{i=1}^{n} (x_i - \bar{x})(y_i - \bar{y}) }{ \sum_{i=1}^n (x_i - \bar{x})^2 } \\[6pt]
&= \frac{ \sum_{i=1}^{n} (x_i y_i - x_i \bar{y} - \bar{x} y_i + \bar{x} \bar{y})} { \sum_{i=1}^n (x_i^2 - 2 x_i \bar{x} + \bar{x}^2) } \\[6pt]
&= \frac{ \sum_{i=1}^{n} (x_i y_i) - \bar{y} \sum_{i=1}^{n} x_i - \bar{x} \sum_{i=1}^{n} y_i + n \bar{x} \bar{y}} { \sum_{i=1}^n (x_i^2) - 2 \bar{x} \sum_{i=1}^n x_i + n \bar{x}^2 } \\[6pt]
&= \frac{ \frac{1}{n} \sum_{i=1}^{n} x_i y_i - \bar{x} \bar{y} }{ \frac{1}{n}\sum_{i=1}^n {x_i^2} - \overline{x}^2 } \\[6pt]
&= \frac{ \overline{xy} - \bar{x}\bar{y} }{ \overline{x^2} - \bar{x}^2 } = \frac{ \operatorname{Cov}[x, y] }{ \operatorname{Var}[x] } \\
&= r_{xy} \frac{s_y}{s_x}, \\[6pt]
\hat\alpha & = \bar{y} - \hat\beta\,\bar{x},
\end{align}$

where {{math|''r_xy''}} is the [[Correlation#Pearson's product-moment coefficient|sample correlation coefficient]] between {{mvar|x}} and {{mvar|y}}; and {{math|''s_x''}} and {{math|''s_y''}} are the [[sample standard deviation]] of {{mvar|x}} and {{mvar|y}}. A horizontal bar over a quantity indicates the average value of that quantity. For example:

: $\overline{xy} = \frac{1}{n} \sum_{i=1}^n x_i y_i.$

Substituting the above expressions for $\hat{\alpha}$ and $\hat{\beta}$ into

: $f = \hat{\alpha} + \hat{\beta} x,$

yields

: $\frac{ f - \bar{y}}{s_y} = r_{xy} \frac{ x - \bar{x}}{s_x}$

This shows that {{math|''r_xy''}} is the slope of the regression line of the [[Standard score|standardized]] data points (and that this line passes through the origin).

It is sometimes useful to calculate {{math|''r_xy''}} from the data independently using this equation:

: $r_{xy} = \frac{ \overline{xy} - \bar{x}\bar{y} }{ \sqrt{ \left(\overline{x^2} - \bar{x}^2\right)\left(\overline{y^2} - \bar{y}^2\right)} }$

The [[coefficient of determination]] (R squared) is equal to $r_{xy}^2$ when the model is linear with a single independent variable. See [[Correlation#Pearson's product-moment coefficient|sample correlation coefficient]] for additional details.

===Linear regression without the intercept term=== Sometimes it is appropriate to force the regression line to pass through the origin, because {{mvar|x}} and {{mvar|y}} are assumed to be proportional. For the model without the intercept term, {{math|''y'' {{=}} ''βx''}}, the OLS estimator for {{mvar|β}} simplifies to

: $\hat{\beta} = \frac{ \sum_{i=1}^n x_i y_i }{ \sum_{i=1}^n x_i^2 } = \frac{\overline{x y}}{\overline{x^2}}$

Substituting {{math|(''x'' − ''h'', ''y'' − ''k'')}} in place of {{math|(''x'', ''y'')}} gives the regression through {{math|(''h'', ''k'')}}:

: $\begin{align}
\hat\beta &= \frac{\overline{(x - h) (y - k)}}{\overline{(x - h)^2}} \\[6pt]
&= \frac{\overline{x y} + k \bar{x} - h \bar{y} - h k }{\overline{x^2} - 2 h \bar{x} + h^2} \\[6pt]
&= \frac{\overline{x y} - \bar{x} \bar{y} + (\bar{x} - h)(\bar{y} - k)}{\overline{x^2} - \bar{x}^2 + (\bar{x} - h)^2} \\[6pt]
&= \frac{\operatorname{Cov}[x,y] + (\bar{x} - h)(\bar{y}-k)}{\operatorname{Var}[x] + (\bar{x} - h)^2}
\end{align}$

The last form above demonstrates how moving the line away from the center of mass of the data points affects the slope.



In [29]:

    
### in case we want to save a DataFrame
#writer = pd.ExcelWriter('myresults.xlsx')
#df.to_excel(writer,'Results')
#writer.save()



In [30]:

    
#resdf









    



---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-30-c36a7634a436> in <module>()
----> 1 resdf

NameError: name 'resdf' is not defined



In [ ]:



In [ ]:



In [ ]:



In [ ]:

	MI_PRINX	offense_id	rpt_date	occur_date	occur_time	poss_date	poss_time	beat	apt_office_prefix	apt_office_num	...	dispo_code	MaxOfnum_victims	Shift	Avg Day	loc_type	UC2 Literal	neighborhood	npu	x	y
206914	1371687	150562000	05/14/2013	05/14/2013	09:00:00	05/14/2013	11:30:00	205	NaN	NaN	...	NaN	1	Day	Tue	18.0	LARCENY-FROM VEHICLE	Woodfield	C	-84.40912	33.82308
207443	4346442	150010052	01/01/2015	12/31/2014	22:00:00	01/01/2015	00:07:00	512	NaN	NaN	...	NaN	1	Morn	Wed	NaN	LARCENY-FROM VEHICLE	Downtown	M	-84.39361	33.75246
207444	4346443	150010079	01/01/2015	01/01/2015	00:03:00	01/01/2015	00:03:00	606	NaN	3377	...	NaN	1	Morn	Thu	26.0	ROBBERY-PEDESTRIAN	Grant Park	W	-84.35917	33.73991
207445	4346444	150010151	01/01/2015	12/31/2014	23:45:00	01/01/2015	00:21:00	208	NaN	NaN	...	20	1	Morn	Thu	18.0	LARCENY-NON VEHICLE	Buckhead Forest	B	-84.37462	33.84564
207446	4346445	150010214	01/01/2015	01/01/2015	00:30:00	01/01/2015	01:05:00	407	1000	1009	...	NaN	2	Morn	Thu	26.0	AGG ASSAULT	Fairburn Mays	H	-84.50968	33.74349

	UC2 Literal	neighborhood	offense_id
0	AGG ASSAULT	Adair Park	23
1	AGG ASSAULT	Adams Park	13
2	AGG ASSAULT	Adamsville	22
3	AGG ASSAULT	Almond Park	8
4	AGG ASSAULT	Argonne Forest	1
5	AGG ASSAULT	Arlington Estates	6
6	AGG ASSAULT	Ashley Courts	2
7	AGG ASSAULT	Ashview Heights	15
8	AGG ASSAULT	Atlanta University Center	21
9	AGG ASSAULT	Atlantic Station	7
10	AGG ASSAULT	Baker Hills	1
11	AGG ASSAULT	Bankhead	28
12	AGG ASSAULT	Bankhead/Bolton	4
13	AGG ASSAULT	Beecher Hills	1
14	AGG ASSAULT	Ben Hill	6
15	AGG ASSAULT	Ben Hill Forest	1
16	AGG ASSAULT	Ben Hill Pines	2
17	AGG ASSAULT	Ben Hill Terrace	4
18	AGG ASSAULT	Benteen Park	4
19	AGG ASSAULT	Berkeley Park	6

	MI_PRINX	offense_id	beat	MinOfucr	MaxOfnum_victims	loc_type	x	y	Zone	occur_month	occur_woy
count	3.001100e+04	3.001100e+04	30011.000000	30011.000000	30011.000000	26903.000000	30011.000000	30011.000000	30011.000000	29997.000000	29997.000000
mean	4.361347e+06	1.518675e+08	359.417813	594.219886	1.194695	21.109356	-84.408346	33.756058	3.527307	6.654399	27.453312
std	1.931052e+04	1.029128e+06	169.563281	114.321851	0.799062	16.579831	0.046894	0.045981	1.700308	3.365509	14.700785
min	1.371687e+06	1.500101e+08	101.000000	210.000000	0.000000	1.000000	-84.546070	33.637450	1.000000	1.000000	1.000000
25%	4.353944e+06	1.510128e+08	208.000000	512.000000	1.000000	13.000000	-84.432445	33.729060	2.000000	4.000000	15.000000
50%	4.361446e+06	1.518913e+08	401.000000	640.000000	1.000000	18.000000	-84.398210	33.756000	4.000000	7.000000	28.000000
75%	4.368948e+06	1.527329e+08	505.000000	670.000000	1.000000	21.000000	-84.374420	33.781470	5.000000	10.000000	40.000000
max	4.376451e+06	1.536580e+08	709.000000	730.000000	44.000000	99.000000	-84.290480	33.883250	7.000000	12.000000	53.000000