notebook.community

Edit and run



In [1]:

    
import pandas as pd
import re
%pylab inline









    



Populating the interactive namespace from numpy and matplotlib



In [16]:

    
df = pd.read_csv('./raw.csv')
df = df.drop('Unnamed: 0', axis=1)



In [17]:

    
def remove_extra_spaces(string):
    string = re.sub( '-', ' ', string ).strip()
    string = re.sub( '\s+', ' ', string ).strip()
    return string

df['Incident'] = df['Incident'].map(remove_extra_spaces)
df['Address'] = df['Address'].map(remove_extra_spaces)



In [18]:

    
def add_three(cat, desc=None):
    cats[cat] = [ cat, 0]
    cats[cat + ' 1ST DEGREE'] = [cat, 1]
    cats[cat + ' 2ND DEGREE'] = [cat, 2]
    cats[cat + ' 3RD DEGREE'] = [cat, 3]
    cats[cat + ' 1ST'] = [cat, 1]
    cats[cat + ' 2ND'] = [cat, 2]
    cats[cat + ' 3RD'] = [cat, 3]
    cats[cat + ' 1'] = [cat, 1]
    cats[cat + ' 2'] = [cat, 2]
    cats[cat + ' 3'] = [cat, 3]
    
cats = {
 'DRIVING UNDER INFLUENCE ALCOHOL': 
     ['DRIVING UNDER INFLUENCE', None],
 'DRIVING UNDER INFLUENCE DRUGS': 
     ['DRIVING UNDER INFLUENCE', None],
 'DRIVING UNDER THE INFLUENCE (ALCOHOL)': 
     ['DRIVING UNDER INFLUENCE', None],
 'DV 3RD HARASSMENT FAMILY': 
     ['DOMESTIC VIOLENCE', 3],
 'DV 3RD SIMPLE ASSAULT FAMILY': 
     ['DOMESTIC VIOLENCE', 3],
 'DV 3RD CRIMINAL MISCHIEF 3RD': 
     ['DOMESTIC VIOLENCE', 3],
 'DV 3RD HARASSING COMMUNICATIONS': 
     ['DOMESTIC VIOLENCE', 3],
 'HARASSMENT': 
     ['HARASSMENT', None],
 'HARASSING COMMUNICATIONS': 
     ['HARASSMENT', None],
 'ILLEGAL POSS/USE CREDIT CARD': 
     ['ILLEGAL POSS/USE CREDIT CARD', None],
 'UNLAWFUL BREAKING AND ENTERING A VEHCILE': 
     ['BREAKING AND ENTERING A VEHICLE', None],
 'UNLAWFUL BREAKING AND ENTERING A VEHICLE': 
     ['BREAKING AND ENTERING A VEHICLE', None],
 'BREAKING AND ENTERING A VEHICLE': 
     ['BREAKING AND ENTERING A VEHICLE', None],
 'UNLAWFUL BREAKING AND ENTERING A VEHICLE (NO THEFT)': 
     ['BREAKING AND ENTERING A VEHICLE', None],
 'UNAUTHORIZED USE OF AUTO': 
     ['UNAUTHORIZED USE', None],
 'UNAUTHORIZED USE OTHER VEHICLE': 
     ['UNAUTHORIZED USE', None],
 'DOMESTIC VIOLENCE BY STRANGULATION OR SUFFOCATION INJURY': 
     ['DOMESTIC VIOLENCE', None],
 'DOMESTIC VIOLENCE BY STRANGULATION OR SUFFOCATION': 
     ['DOMESTIC VIOLENCE', None],
 'DRIVING UNDER THE INFLUENCE (UNDER 21 YEARS)':
     ['DRIVING UNDER INFLUENCE', None],
 'DRIVING UNDER THE INFLUENCE (ALC. & CON. SUBS.)':
     ['DRIVING UNDER INFLUENCE', None],
 'CRUELTY TO ANIMALS':
     ['ANIMAL', None],
 'HARBORING VICIOUS OR DANGEROUS ANIMAL':
     ['ANIMAL', None],
 'ASSAULT':
     ['ASSAULT', None],
 'SIMPLE ASSAULT':
     ['ASSAULT', None],
 'CARRYING CONCEALED WEAPON':
     ['WEAPON', None],
 'FIREARMS LICENSE REQUIRED':
     ['WEAPON', None],
 'LICENSE TO CARRY PISTOL':
     ['WEAPON', None],
 'ALTER FIREARM ID NUMBER':
     ['WEAPON', None],
 'BRASS KNUCKLES':
     ['WEAPON', None],
 
 }

threes = [
  'ARSON',
  'AUTO THEFT', 
  'BREAKING AND ENTERING VEHICLE',
  'BURGLARY', 
  'CRIMINAL MISCHIEF',
  'CRIMINAL TAMPERING', 
  'CRIMINAL TRESPASS',
  'DOMESTIC VIOLENCE',
  'ELDER ABUSE AND NEGLECT',
  'ESCAPE',
  'EXTORTION',
  'FORGERY',
  'FINANCIAL EXPLOITATION OF ELDERLY PERSON',
  'IDENTITY THEFT',
  'RECEIVING STOLEN PROPERTY',
  'ROBBERY',
  'THEFT',
  'THEFT OF OTHER VEHICLES',
  'THEFT OF LOST PROPERTY',
  'THEFT OF SERVICES',
  'POSS. FORGED INSTRUMENT',
  'PROMOTING CONTRABAND',
  'STALKING',
  'HINDERING PROSECUTION',
  'PROMOTING PROSTITUTION',
  'PERJURY',
  'THEFT & SALE',
]

for three in threes:
    add_three(three)

others_deg = { 
  'AGGRAVATED ASSAULT': 
      ['ASSAULT', 'AGGRAVATED, '],
  'ROBBERY': 
      ['ROBBERY', ''],
  'DV': 
      ['DOMESTIC VIOLENCE', ''],
  'EXTORTION': 
      ['EXTORTION', ''],
  'ATTEMPTED BURGLARY': 
      ['ATTEMPTED BUGLARY', ''],
}

others = [
  'DISCHARGING FIREARM',
  'DUTY UPON STRIKING',
  'MURDER',
  'ADULT SEX OFFENDER',
  'ANIMAL',
  'ATTEMPTED MURDER',
  ]
    
def fix(string):
    if cats.has_key(string):
        return cats[string]
        
    for (k,v) in others_deg.iteritems():
        for (i, s) in enumerate(['1ST', '2ND', '3RD']):
            if string.find(k + ' ' + s) == 0:
                return [v[0], i]
    
    for k in others:
        if string.find(k) == 0:
            return [k, None]
    return ['OTHER', None]



In [19]:

    
new_data = pd.DataFrame(list(df['Incident'].map(fix)), columns=['cat', 'deg'])
new_data = df.join(new_data)
new_data.groupby('cat').count()

#d = new_data[pd.isnull(new_data['cat'])]
#grouped = d.groupby('Incident')
#c = grouped.count()
#v = c.sort('Address', ascending=False)['Incident']
#print v.to_string()









    Out[19]:






  
    
      
      Address
      Case
      DateTime
      File
      Incident
      NumInc
      Shift
      cat
      deg
    
    
      cat
      
      
      
      
      
      
      
      
      
    
  
  
    
      ADULT SEX OFFENDER
          7
          7
          7
          7
          7
          7
          7
          7
          0
    
    
      ANIMAL
         67
         67
         67
         67
         67
         67
         67
         67
          0
    
    
      ARSON
          5
          5
          5
          5
          5
          5
          5
          5
          5
    
    
      ASSAULT
        270
        270
        270
        270
        270
        270
        270
        270
         86
    
    
      ATTEMPTED BUGLARY
         13
         13
         13
         13
         13
         13
         13
         13
         13
    
    
      ATTEMPTED MURDER
          7
          7
          7
          7
          7
          7
          7
          7
          0
    
    
      AUTO THEFT
        161
        161
        161
        161
        161
        161
        161
        161
        161
    
    
      BREAKING AND ENTERING A VEHICLE
       1502
       1502
       1502
       1502
       1502
       1502
       1502
       1502
          0
    
    
      BREAKING AND ENTERING VEHICLE
          2
          2
          2
          2
          2
          2
          2
          2
          2
    
    
      BURGLARY
       1125
       1125
       1125
       1125
       1125
       1125
       1125
       1125
       1125
    
    
      CRIMINAL MISCHIEF
       1158
       1158
       1158
       1158
       1158
       1158
       1158
       1158
       1158
    
    
      CRIMINAL TAMPERING
         40
         40
         40
         40
         40
         40
         40
         40
         40
    
    
      CRIMINAL TRESPASS
        173
        173
        173
        173
        173
        173
        173
        173
        173
    
    
      DISCHARGING FIREARM
         28
         28
         28
         28
         28
         28
         28
         28
          0
    
    
      DOMESTIC VIOLENCE
       1864
       1864
       1864
       1864
       1864
       1864
       1864
       1864
       1813
    
    
      DRIVING UNDER INFLUENCE
         28
         28
         28
         28
         28
         28
         28
         28
          0
    
    
      DUTY UPON STRIKING
        197
        197
        197
        197
        197
        197
        197
        197
          0
    
    
      ELDER ABUSE AND NEGLECT
          3
          3
          3
          3
          3
          3
          3
          3
          3
    
    
      ESCAPE
         10
         10
         10
         10
         10
         10
         10
         10
         10
    
    
      EXTORTION
          2
          2
          2
          2
          2
          2
          2
          2
          2
    
    
      FINANCIAL EXPLOITATION OF ELDERLY PERSON
          4
          4
          4
          4
          4
          4
          4
          4
          4
    
    
      FORGERY
         94
         94
         94
         94
         94
         94
         94
         94
         94
    
    
      HARASSMENT
       1296
       1296
       1296
       1296
       1296
       1296
       1296
       1296
          0
    
    
      HINDERING PROSECUTION
          7
          7
          7
          7
          7
          7
          7
          7
          7
    
    
      IDENTITY THEFT
        445
        445
        445
        445
        445
        445
        445
        445
        445
    
    
      ILLEGAL POSS/USE CREDIT CARD
        224
        224
        224
        224
        224
        224
        224
        224
          0
    
    
      MURDER
          5
          5
          5
          5
          5
          5
          5
          5
          0
    
    
      OTHER
       1780
       1780
       1780
       1780
       1780
       1780
       1780
       1780
          0
    
    
      PERJURY
          1
          1
          1
          1
          1
          1
          1
          1
          1
    
    
      POSS. FORGED INSTRUMENT
        155
        155
        155
        155
        155
        155
        155
        155
        155
    
    
      PROMOTING CONTRABAND
         12
         12
         12
         12
         12
         12
         12
         12
         12
    
    
      PROMOTING PROSTITUTION
          3
          3
          3
          3
          3
          3
          3
          3
          3
    
    
      RECEIVING STOLEN PROPERTY
         97
         97
         97
         97
         97
         97
         97
         97
         97
    
    
      ROBBERY
        144
        144
        144
        144
        144
        144
        144
        144
        144
    
    
      STALKING
         12
         12
         12
         12
         12
         12
         12
         12
         12
    
    
      THEFT
       4637
       4637
       4637
       4637
       4637
       4637
       4637
       4637
       4637
    
    
      THEFT & SALE
          1
          1
          1
          1
          1
          1
          1
          1
          1
    
    
      THEFT OF LOST PROPERTY
        139
        139
        139
        139
        139
        139
        139
        139
        139
    
    
      THEFT OF OTHER VEHICLES
          1
          1
          1
          1
          1
          1
          1
          1
          1
    
    
      THEFT OF SERVICES
        113
        113
        113
        113
        113
        113
        113
        113
        113
    
    
      UNAUTHORIZED USE
        118
        118
        118
        118
        118
        118
        118
        118
          0
    
    
      WEAPON
         87
         87
         87
         87
         87
         87
         87
         87
          0
    
  

42 rows × 9 columns



In [20]:

    
def fix(string):
    string = re.sub( 'Madison$', '', string ).strip()
    string = re.sub( 'Av$', 'Avenue', string ).strip()
    string = re.sub( 'Blvd$', 'Boulevard', string).strip()
    string = re.sub( 'Ln$', 'Lane', string ).strip()
    string = re.sub( 'Dr$', 'Drive', string ).strip()
    string = re.sub( 'Ct$', 'Court', string ).strip()
    string = re.sub( 'Cir$', 'Circle', string ).strip()
    string = re.sub( 'Rd$', 'Road', string ).strip()
    string = re.sub( 'Hwy$', 'Highway', string ).strip()
    string = re.sub( '72 W$', '72 West', string ).strip()
    string = re.sub( 'St$', 'Street', string).strip()
    string = re.sub( 'Tr$', 'Trail', string).strip()
    string = re.sub( 'Pl$', 'Place', string).strip()
    return string

new_data['Address'] = pd.DataFrame(list(new_data['Address'].map(fix)))



In [21]:

    
new_data.to_csv('clean.csv')



In [14]:

    
from geopy.geocoders import GoogleV3
import time
geolocator = GoogleV3(api_key="AIzaSyCtnKL4GRaUynWx2kuXaM0eVGpRObv-ZLs")

def geocode(string):
    print string
    location = geolocator.geocode(string + " Madison, AL")
    time.sleep(0.2)
    if location is None:
        return (None, None, None, None)
    if type(location) is tuple:
        location = location[0]
    return (location.address, location.latitude, location.longitude, location.raw)



In [64]:

    
addresses = new_data.groupby('Address')
for (k,v) in addresses.groups.iteritems():
    if not gc_results.has_key(k):
        gc_results[k] = geocode(k)









    



1000 Block of Nolan Court
Media Solutions, 9632 Madison Boulevard
The Heritage Station Homeowners Assoc, 116 Bayberry Lane
1000 Block of Waters Edge Lane
Board Of Education, 211 Celtic Drive
area of Chestnut Dr / Wall Triana Highway
Lowes Home Improvement Warehouse, 7920 Highway 72 West
area of Stella Drive
100 Block of Dupont Circle
City Of Madison, 5100 Wall Triana Highway
Halsey Food Service, 401 Lanier Road
Hogan Family Ymca, 130 Park Square Lane
City Of Madison, Hughes Rd / Will Halsey Way
Madison Police Department, Shelton Rd / Summerview Drive
600 Block of Larry Drive
area of Slaughter Road
600 Block of Browns Ferry Road
100 Block of Carlisle Circle
100 Block of Cambridge Circle
100 Block of Durham Drive
State Of Alabama, Madison Blvd / Wall Triana Highway
State Of Alabama, 8716 Madison Boulevard
Cb&S Bank, 8018 Highway 72 West
200 Block of Chantilly Lane
State Of Alabama, Bridgefield Rd / Wall Triana Highway
Madison City Board Of Education, 11306 County Line Road
American Overhead Door, 9091 Madison Boulevard
100 Block of Liza Lane
City Of Madison, Wall Triana Hwy @ I 565
St John Baptist Catholic Church, 1055 Hughes Road
area of Madison Blvd Madison / Wall Triana Highway
200 Block of Carriage Hill
100 Block of Lanthorn Circle
300 Block of Martin Street
Reeds Cable, 8000 Old Madison Pike
100 Block of Carlton Woods Drive
100 Block of Emerson Road
100 Block of Foxfield Drive
200 Block of Kyser Boulevard
City Of Madison, 151 Stone Meadow Lane
City Of Madison, Pebble Brook Cir / Wall Triana Highway
Buds Convenience Store, 101 Kyser Boulevard
City Of Madison, Gillespie Rd / Wall Triana Highway
100 Block of Autumn Haven Lane
200 Block of Shelton Road
900 Block of Portal Lane
State Of Alabama, 1550 Hughes Road
area of Rocket Harley Davidson
area of Green Creek Road
100 Block of Reinhart Circle
City Of Madison, 137 Manningham Drive
300 Block of Autumn Lane
City Of Huntsville, 138 Teal Park Lane
200 Block of Waterson Way
200 Block of Park Stone Drive
City Of Madison, 8781 Madison Boulevard
The Station Pub And Grill, 8694 Madison Boulevard
800 Block of Ashwood Drive
K`S Beauty Supply, 7837 Highway 72 West
Cd`S Pub And Grill, 107 Arlington Drive
100 Block of Liverpool Drive
area of Angela Drive
City Of Madison, 118 Ashville Wood Court
Hunters Chase Apartments, 340 Gillespie Road
City Of Madison, Bowers Rd / Powell Road
Madison City Schools, 11306 County Line Road
200 Block of Moore Street
200 Block of Front Street
City Of Madison, 145 Angela Drive
Neurovizion, 3490 Wall Triana Highway
100 Block of Vinewood Lane
area of Bradford Farms Subdivision
City Of Madison, 100 Park Square Lane
100 Block of Conger Road
State Of Alabama, 244 Kyser Boulevard
100 Block of Chadrick Drive
100 Block of Kensington Drive
Rbc Centura Bank, 7930 Highway 72 West





    



---------------------------------------------------------------------------
GeocoderQuotaExceeded                     Traceback (most recent call last)
<ipython-input-64-29d5ce5bd3db> in <module>()
      2 for (k,v) in addresses.groups.iteritems():
      3     if not gc_results.has_key(k):
----> 4         gc_results[k] = geocode(k)

<ipython-input-28-fe0dbede2a25> in geocode(string)
      5 def geocode(string):
      6     print string
----> 7     location = geolocator.geocode(string + " Madison, AL")
      8     time.sleep(0.2)
      9     if location is None:

/usr/local/lib/python2.7/dist-packages/geopy/geocoders/googlev3.pyc in geocode(self, query, exactly_one, timeout, bounds, region, components, language, sensor)
    214         logger.debug("%s.geocode: %s", self.__class__.__name__, url)
    215         return self._parse_json(
--> 216             self._call_geocoder(url, timeout=timeout), exactly_one
    217         )
    218 

/usr/local/lib/python2.7/dist-packages/geopy/geocoders/googlev3.pyc in _parse_json(self, page, exactly_one)
    335         places = page.get('results', [])
    336         if not len(places):
--> 337             self._check_status(page.get('status'))
    338             return None
    339 

/usr/local/lib/python2.7/dist-packages/geopy/geocoders/googlev3.pyc in _check_status(status)
    360         if status == 'OVER_QUERY_LIMIT':
    361             raise GeocoderQuotaExceeded(
--> 362                 'The given key has gone over the requests limit in the 24'
    363                 ' hour period or has submitted too many requests in too'
    364                 ' short a period of time.'

GeocoderQuotaExceeded: The given key has gone over the requests limit in the 24 hour period or has submitted too many requests in too short a period of time.



In [22]:

    
geocode_df = pd.DataFrame(gc_results).transpose()
geocode_df.columns = ['address', 'lat', 'lon', 'raw']
geocode_df.to_csv('geocode.csv')









    



---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-22-93778e16b17c> in <module>()
----> 1 geocode_df = pd.DataFrame(gc_results).transpose()
      2 geocode_df.columns = ['address', 'lat', 'lon', 'raw']
      3 geocode_df.to_csv('geocode.csv')

/usr/lib/python2.7/dist-packages/pandas/core/frame.pyc in __init__(self, data, index, columns, dtype, copy)
    199                                  dtype=dtype, copy=copy)
    200         elif isinstance(data, dict):
--> 201             mgr = self._init_dict(data, index, columns, dtype=dtype)
    202         elif isinstance(data, ma.MaskedArray):
    203             import numpy.ma.mrecords as mrecords

/usr/lib/python2.7/dist-packages/pandas/core/frame.pyc in _init_dict(self, data, index, columns, dtype)
    321 
    322         return _arrays_to_mgr(arrays, data_names, index, columns,
--> 323                               dtype=dtype)
    324 
    325     def _init_ndarray(self, values, index, columns, dtype=None,

/usr/lib/python2.7/dist-packages/pandas/core/frame.pyc in _arrays_to_mgr(arrays, arr_names, index, columns, dtype)
   4461     # figure out the index, if necessary
   4462     if index is None:
-> 4463         index = extract_index(arrays)
   4464     else:
   4465         index = _ensure_index(index)

/usr/lib/python2.7/dist-packages/pandas/core/frame.pyc in extract_index(data)
   4512 
   4513             if have_dicts:
-> 4514                 raise ValueError('Mixing dicts with non-Series may lead to '
   4515                                  'ambiguous ordering.')
   4516 

ValueError: Mixing dicts with non-Series may lead to ambiguous ordering.



In [112]:

    
geocode_df = pd.DataFrame.from_csv('geocode.csv')
geocode_df.ix[ geocode_df.lat < 34.484434, ['lat', 'lon']] = nan
geocode_df.ix[ geocode_df.lat > 34.8950, ['lat', 'lon']] = nan
geocode_df.ix[ geocode_df.lon > -86.4159, ['lat', 'lon']] = nan
geocode_df.ix[ geocode_df.lon < -86.984023, ['lat', 'lon']] = nan
geocode_dict = geocode_df.to_dict('dict')



In [114]:

    
new_data['Lat'] = new_data['Address'].apply(geocode_dict['lat'].get)
new_data['Lon'] = new_data['Address'].apply(geocode_dict['lon'].get)
new_data['LatLon'] = new_data['Address'].apply(geocode_dict['lat'].get).map(str) + ", " + new_data['Address'].apply(geocode_dict['lon'].get).map(str)
new_data.to_csv('new_data_gc.csv')



In [ ]:

	Address	Case	DateTime	File	Incident	NumInc	Shift	cat	deg
cat
ADULT SEX OFFENDER	7	7	7	7	7	7	7	7	0
ANIMAL	67	67	67	67	67	67	67	67	0
ARSON	5	5	5	5	5	5	5	5	5
ASSAULT	270	270	270	270	270	270	270	270	86
ATTEMPTED BUGLARY	13	13	13	13	13	13	13	13	13
ATTEMPTED MURDER	7	7	7	7	7	7	7	7	0
AUTO THEFT	161	161	161	161	161	161	161	161	161
BREAKING AND ENTERING A VEHICLE	1502	1502	1502	1502	1502	1502	1502	1502	0
BREAKING AND ENTERING VEHICLE	2	2	2	2	2	2	2	2	2
BURGLARY	1125	1125	1125	1125	1125	1125	1125	1125	1125
CRIMINAL MISCHIEF	1158	1158	1158	1158	1158	1158	1158	1158	1158
CRIMINAL TAMPERING	40	40	40	40	40	40	40	40	40
CRIMINAL TRESPASS	173	173	173	173	173	173	173	173	173
DISCHARGING FIREARM	28	28	28	28	28	28	28	28	0
DOMESTIC VIOLENCE	1864	1864	1864	1864	1864	1864	1864	1864	1813
DRIVING UNDER INFLUENCE	28	28	28	28	28	28	28	28	0
DUTY UPON STRIKING	197	197	197	197	197	197	197	197	0
ELDER ABUSE AND NEGLECT	3	3	3	3	3	3	3	3	3
ESCAPE	10	10	10	10	10	10	10	10	10
EXTORTION	2	2	2	2	2	2	2	2	2
FINANCIAL EXPLOITATION OF ELDERLY PERSON	4	4	4	4	4	4	4	4	4
FORGERY	94	94	94	94	94	94	94	94	94
HARASSMENT	1296	1296	1296	1296	1296	1296	1296	1296	0
HINDERING PROSECUTION	7	7	7	7	7	7	7	7	7
IDENTITY THEFT	445	445	445	445	445	445	445	445	445
ILLEGAL POSS/USE CREDIT CARD	224	224	224	224	224	224	224	224	0
MURDER	5	5	5	5	5	5	5	5	0
OTHER	1780	1780	1780	1780	1780	1780	1780	1780	0
PERJURY	1	1	1	1	1	1	1	1	1
POSS. FORGED INSTRUMENT	155	155	155	155	155	155	155	155	155
PROMOTING CONTRABAND	12	12	12	12	12	12	12	12	12
PROMOTING PROSTITUTION	3	3	3	3	3	3	3	3	3
RECEIVING STOLEN PROPERTY	97	97	97	97	97	97	97	97	97
ROBBERY	144	144	144	144	144	144	144	144	144
STALKING	12	12	12	12	12	12	12	12	12
THEFT	4637	4637	4637	4637	4637	4637	4637	4637	4637
THEFT & SALE	1	1	1	1	1	1	1	1	1
THEFT OF LOST PROPERTY	139	139	139	139	139	139	139	139	139
THEFT OF OTHER VEHICLES	1	1	1	1	1	1	1	1	1
THEFT OF SERVICES	113	113	113	113	113	113	113	113	113
UNAUTHORIZED USE	118	118	118	118	118	118	118	118	0
WEAPON	87	87	87	87	87	87	87	87	0