In [2]:
import pandas as pd
import numpy as np

INITIAL READING

Read in the data.


In [4]:
crashes = pd.read_csv('../crash_data/crashes.csv')
crashes.head()


Out[4]:
DATE TIME BOROUGH ZIP CODE LATITUDE LONGITUDE LOCATION ON STREET NAME CROSS STREET NAME OFF STREET NAME ... CONTRIBUTING FACTOR VEHICLE 2 CONTRIBUTING FACTOR VEHICLE 3 CONTRIBUTING FACTOR VEHICLE 4 CONTRIBUTING FACTOR VEHICLE 5 UNIQUE KEY VEHICLE TYPE CODE 1 VEHICLE TYPE CODE 2 VEHICLE TYPE CODE 3 VEHICLE TYPE CODE 4 VEHICLE TYPE CODE 5
0 01/20/2015 18:10 MANHATTAN 10011 40.742255 -74.000704 (40.7422546, -74.0007037) 8 AVENUE WEST 18 STREET NaN ... Unspecified NaN NaN NaN 3155964 TAXI AMBULANCE NaN NaN NaN
1 01/20/2015 9:50 NaN NaN NaN NaN NaN NaN NaN NaN ... Unspecified NaN NaN NaN 3157038 PASSENGER VEHICLE MOTORCYCLE NaN NaN NaN
2 01/20/2015 9:55 BROOKLYN 11226 40.646847 -73.950126 (40.6468467, -73.9501265) TILDEN AVENUE EAST 29 STREET NaN ... Unspecified NaN NaN NaN 3156247 SPORT UTILITY / STATION WAGON PASSENGER VEHICLE NaN NaN NaN
3 01/20/2015 9:55 QUEENS 11361 40.761670 -73.775962 (40.76167, -73.7759617) 42 AVENUE 209 STREET NaN ... NaN NaN NaN NaN 3156539 PASSENGER VEHICLE NaN NaN NaN NaN
4 01/20/2015 7:02 BROOKLYN 11204 40.617853 -73.988067 (40.6178527, -73.9880665) 19 AVENUE 65 STREET NaN ... Unspecified NaN NaN NaN 3156217 SPORT UTILITY / STATION WAGON PASSENGER VEHICLE NaN NaN NaN

5 rows × 29 columns

CLEAN UP

Go through and clean the data set: Use the open street mapper / google maps geocoder to find the lat/lon or street name for rows missing this information.


In [245]:
import requests
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import json
import time

# CLEAN CRASH DATA SET
crashes = open('../crash_data/crashes.csv', 'r')
heading = crashes.readline().strip().split(',')  # skip the first line
w = open('../crash_data/crashes_clean.csv', 'w')
 
    
# Example: METROPOLITAN AVENUE and 74 AVENUE
def latlon_geocode(latlon):
    r = requests.get('http://nominatim.openstreetmap.org/search?q='+latlon+'&addressdetails=1&format=json')
    return r.json()

def address_geocode(address):
    r = requests.get('https://maps.googleapis.com/maps/api/geocode/json?address='+address)
    return r.json()

#go through each line and clean it
count = 0
for crash in crashes:
    time.sleep(1)
    if count > 4:
        break
    count+=1
        
    #parse the line
    line = crash.strip().split(',')
    print line[:6],
    bor = line[2]
    zip_ = line[3]
    lat = line[4]
    lon = line[5]
    st1 = line[6]
    st2 = line[7]
    st3 = line[8]

    #find missing info if needed
    try:
        if bor != '' and zip_ != '' and st1 != '' and st2 != '': # this line is okay
            print 'okay'
            pass
        elif lat != '' and lon != '': 
            print 'lat/lon geocoding'
            geo = latlon_geocode(lat+','+lon)
            bor = geo[0]['address']['county'][:geo[0]['address']['county'].index(' ')]
            zip_ = geo[0]['address']['postcode']
            st1 = geo[0]['address']['road']
        elif st1 != '' and st2 != '':
            print 'st1/st2 geocoding'
            geo = address_geocode(st1+' and '+st2)            
            bor = [ac['short_name'] for ac in geo['results'][0]['address_components'] if 'sublocality' in ac['types']][0]
            zip_ = [ac['short_name'] for ac in geo['results'][0]['address_components'] if 'postal_code' in ac['types']][0]
            lat = geo['results'][0]['geometry']['location']['lat']
            lon = geo['results'][0]['geometry']['location']['lng']
            #neigh = [ac['short_name'] ac in geo['results'][0]['address_components'] if 'neighborhood' in ac['types']][0]
        elif st3 != '':
            print 'st3 geocoding'
            geo = address_geocode(st3)
            bor = [ac['short_name'] for ac in geo['results'][0]['address_components'] if 'sublocality' in ac['types']][0]
            zip_ = [ac['short_name'] for ac in geo['results'][0]['address_components'] if 'postal_code' in ac['types']][0]
            lat = geo['results'][0]['geometry']['location']['lat']
            lon = geo['results'][0]['geometry']['location']['lng']
            #neigh = [ac['short_name'] ac in geo['results'][0]['address_components'] if 'neighborhood' in ac['types']][0]
        else:
            print "no location details!:"
    except Exception as e:
        print "exception"

    #replace line entries
    line[2] = bor.upper()
    line[3] = zip_
    line[4] = str(lat)
    line[5] = str(lon)
    line[6] = st1.upper()
    line[7] = st2.upper()
    
    w.write(','.join(line)+'\n')
    
crashes.close()
w.close()


['01/04/2015', '16:30', 'QUEENS', '11430', '40.6661512', '-73.8057432'] okay
['01/04/2015', '16:30', '', '', '40.7710377', '-73.8341189'] lat/lon geocoding
['01/04/2015', '16:30', '', '', '40.7894677', '-73.78603'] lat/lon geocoding
['01/04/2015', '16:50', 'BRONX', '10475', '40.8694288', '-73.8257368'] lat/lon geocoding
['01/04/2015', '17:00', 'MANHATTAN', '10009', '40.727846', '-73.9822272'] okay

BUILD INTERSECTION DATA SET

Add an intersection name that is the concatenation of lat and lon to use as the index. Add crash column for summing the number of crashes.


In [247]:
# add index for intersection
crashes['INTERSECTION'] = crashes['LATITUDE'].apply(str) + "_" + crashes['LONGITUDE'].apply(str)
crashes['CRASHES'] = 1
print crashes.columns
crashes.head(n=5)


Index([u'DATE', u'TIME', u'BOROUGH', u'ZIP CODE', u'LATITUDE', u'LONGITUDE', u'ON STREET NAME', u'CROSS STREET NAME', u'OFF STREET NAME', u'NUMBER OF PERSONS INJURED', u'NUMBER OF PERSONS KILLED', u'NUMBER OF PEDESTRIANS INJURED', u'NUMBER OF PEDESTRIANS KILLED', u'NUMBER OF CYCLIST INJURED', u'NUMBER OF CYCLIST KILLED', u'NUMBER OF MOTORIST INJURED', u'NUMBER OF MOTORIST KILLED', u'CONTRIBUTING FACTOR VEHICLE 1', u'CONTRIBUTING FACTOR VEHICLE 2', u'CONTRIBUTING FACTOR VEHICLE 3', u'CONTRIBUTING FACTOR VEHICLE 4', u'CONTRIBUTING FACTOR VEHICLE 5', u'UNIQUE KEY', u'VEHICLE TYPE CODE 1', u'VEHICLE TYPE CODE 2', u'VEHICLE TYPE CODE 3', u'VEHICLE TYPE CODE 4', u'VEHICLE TYPE CODE 5', u'INTERSECTION', u'CRASHES'], dtype='object')
Out[247]:
DATE TIME BOROUGH ZIP CODE LATITUDE LONGITUDE ON STREET NAME CROSS STREET NAME OFF STREET NAME NUMBER OF PERSONS INJURED ... CONTRIBUTING FACTOR VEHICLE 4 CONTRIBUTING FACTOR VEHICLE 5 UNIQUE KEY VEHICLE TYPE CODE 1 VEHICLE TYPE CODE 2 VEHICLE TYPE CODE 3 VEHICLE TYPE CODE 4 VEHICLE TYPE CODE 5 INTERSECTION CRASHES
0 01/04/2015 16:30 QUEENS 11354 40.771038 -73.834119 WHITESTONE EXPRESSWAY LINDEN PLACE NaN 0 ... NaN NaN 3147330 PASSENGER VEHICLE PASSENGER VEHICLE NaN NaN NaN 40.7710377_-73.8341189 1
1 01/04/2015 16:30 QUEENS 11359 40.789468 -73.786030 CROSS ISLAND PARKWAY NaN NaN 0 ... NaN NaN 3147784 PASSENGER VEHICLE VAN NaN NaN NaN 40.7894677_-73.78603 1
2 01/04/2015 16:50 BRONX 10475 40.869429 -73.825737 BARTOW AVENUE NaN 2100 BARTOW AVENUE 0 ... NaN NaN 3147071 PASSENGER VEHICLE PASSENGER VEHICLE NaN NaN NaN 40.8694288_-73.8257368 1
3 01/04/2015 17:00 MANHATTAN 10009 40.727846 -73.982227 EAST 10 STREET AVENUE A NaN 1 ... NaN NaN 3146986 PASSENGER VEHICLE BICYCLE NaN NaN NaN 40.727846_-73.9822272 1
4 01/04/2015 17:04 BRONX 10466 40.884735 -73.840441 BAYCHESTER AVENUE CRAWFORD AVENUE NaN 1 ... NaN NaN 3147083 PASSENGER VEHICLE UNKNOWN NaN NaN NaN 40.8847353_-73.8404411 1

5 rows × 30 columns


In [257]:
# GROUP CRASHES BY INTERSECTION AGGREGATING CRASHES THROWING AWAY OTHER
gb = crashes.groupby(by='INTERSECTION', as_index=False)

longest = lambda x: x.get(x.map(lambda x: len(str(x))).idxmax())  # longest string
mirror = lambda x: x.iget(0)
agg_fun = {u'BOROUGH':longest,
           u'ZIP CODE':mirror,
           u'LATITUDE':mirror,
           u'LONGITUDE':mirror,
           u'ON STREET NAME':longest,
           u'CROSS STREET NAME':longest,
           u'OFF STREET NAME':longest,
           u'NUMBER OF PERSONS INJURED':np.sum,
           u'NUMBER OF PERSONS KILLED':np.sum,
           u'NUMBER OF PEDESTRIANS INJURED':np.sum,
           u'NUMBER OF PEDESTRIANS KILLED':np.sum,
           u'NUMBER OF CYCLIST INJURED':np.sum,
           u'NUMBER OF CYCLIST KILLED':np.sum,
           u'NUMBER OF MOTORIST INJURED':np.sum,
           u'NUMBER OF MOTORIST KILLED':np.sum,
           'CRASHES':np.sum
           }
groupby = gb.aggregate(agg_fun)  # aggregate

#REMOVE ANY INTERSECTIONS WITH BAD GEOCODING -- lat north/south, lon east/west
#define a bounding box
top_lat = 40.935206
bot_lat = 40.488640
right_lon = -73.699619
left_lon = -74.261982
intersections = groupby[ (groupby.LONGITUDE  > left_lon) & (groupby.LONGITUDE < right_lon) &
                                   (groupby.LATITUDE > bot_lat) & (groupby.LATITUDE < top_lat) ]
intersections.to_csv('../crash_data/intersections.csv', index=False)  # save to csv
intersections.sort('INTERSECTION').head()


Out[257]:
INTERSECTION NUMBER OF CYCLIST KILLED ON STREET NAME CRASHES NUMBER OF PERSONS KILLED NUMBER OF MOTORIST INJURED ZIP CODE LONGITUDE NUMBER OF CYCLIST INJURED NUMBER OF MOTORIST KILLED CROSS STREET NAME NUMBER OF PEDESTRIANS KILLED NUMBER OF PEDESTRIANS INJURED LATITUDE NUMBER OF PERSONS INJURED BOROUGH OFF STREET NAME
8 40.4989488_-74.2443651 0 SWINNERTON STREET 2 0 0 10307 -74.244365 0 0 BILLOP AVENUE 0 0 40.498949 0 STATEN ISLAND NaN
9 40.499842_-74.2399169 0 YETMAN AVENUE 2 0 0 10307 -74.239917 0 0 BILLOP AVENUE 0 0 40.499842 0 STATEN ISLAND NaN
10 40.5000251_-74.2390115 0 ROCKAWAY STREET 3 0 0 10307 -74.239012 0 0 BILLOP AVENUE 0 0 40.500025 0 STATEN ISLAND NaN
11 40.5002081_-74.2381024 0 LORETTO STREET 1 0 0 10307 -74.238102 0 0 BILLOP AVENUE 0 0 40.500208 0 STATEN ISLAND NaN
12 40.5004173_-74.2364851 0 SPRAGUE AVENUE 1 0 0 10307 -74.236485 0 0 SURF AVENUE 0 0 40.500417 0 STATEN ISLAND NaN

In [256]:
#Select only those that are within new york bounding box
#lat north/south, lon east/west
top_lat = 40.935206
bot_lat = 40.488640
right_lon = -73.699619
left_lon = -74.261982
intersections = pd.read_csv('../crash_data/intersections.csv')
print 'old number intersections', len(intersections)
    
print 'initial number of intersections:', len(groupby)
new_intersections = groupby[ (groupby.LONGITUDE  > left_lon) & (groupby.LONGITUDE < right_lon) &
                                   (groupby.LATITUDE > bot_lat) & (groupby.LATITUDE < top_lat) ]
print 'new intersections:', len(new_intersections)
new_intersections.sort('INTERSECTION').head()


old number intersections 39239
initial number of intersections: 41109
new intersections: 41096
Out[256]:
INTERSECTION NUMBER OF CYCLIST KILLED ON STREET NAME CRASHES NUMBER OF PERSONS KILLED NUMBER OF MOTORIST INJURED ZIP CODE LONGITUDE NUMBER OF CYCLIST INJURED NUMBER OF MOTORIST KILLED CROSS STREET NAME NUMBER OF PEDESTRIANS KILLED NUMBER OF PEDESTRIANS INJURED LATITUDE NUMBER OF PERSONS INJURED BOROUGH OFF STREET NAME
8 40.4989488_-74.2443651 0 SWINNERTON STREET 2 0 0 10307 -74.244365 0 0 BILLOP AVENUE 0 0 40.498949 0 STATEN ISLAND NaN
9 40.499842_-74.2399169 0 YETMAN AVENUE 2 0 0 10307 -74.239917 0 0 BILLOP AVENUE 0 0 40.499842 0 STATEN ISLAND NaN
10 40.5000251_-74.2390115 0 ROCKAWAY STREET 3 0 0 10307 -74.239012 0 0 BILLOP AVENUE 0 0 40.500025 0 STATEN ISLAND NaN
11 40.5002081_-74.2381024 0 LORETTO STREET 1 0 0 10307 -74.238102 0 0 BILLOP AVENUE 0 0 40.500208 0 STATEN ISLAND NaN
12 40.5004173_-74.2364851 0 SPRAGUE AVENUE 1 0 0 10307 -74.236485 0 0 SURF AVENUE 0 0 40.500417 0 STATEN ISLAND NaN

In [ ]:
#  ADD YELP COLUMN TO INTERSECTIONS
import pandas as pd
import numpy as np
import time
import oauth2
import requests

def number_businesses(intersection):
    """
    Queries yelp api for number of businesses
    around a given lat and lon.
    """
    split = intersection.split('_')
    if len(split) < 2:
        return 0
    lat, lon = map(float, split)
    #radius = 80  # approx 1/2 block in meters

    # OAuth credentials from Yelp
    CONSUMER_KEY = 'T-urieHnNUVui5RcKb1XyQ'
    CONSUMER_SECRET = '7rpMeGcyngvXcmXbNuMlGq1VqwU'
    TOKEN = 'byRjsqCx1h7lHuu0xAq32L8WB2pEWQHR'
    TOKEN_SECRET = '43aSvtErO0nH7YO5Rmg8XSWYuvU'

    #build the request
    url = 'http://api.yelp.com/v2/search?term=FOOD&ll={0},{1}&radius_filter={2}'.format(lat, lon, radius)
    consumer = oauth2.Consumer(CONSUMER_KEY, CONSUMER_SECRET)
    oauth_request = oauth2.Request(method="GET", url=url, parameters={})
    oauth_request.update(
        {
            'oauth_nonce': oauth2.generate_nonce(),
            'oauth_timestamp': oauth2.generate_timestamp(),
            'oauth_token': TOKEN,
            'oauth_consumer_key': CONSUMER_KEY
        }
    )
    token = oauth2.Token(TOKEN, TOKEN_SECRET)
    oauth_request.sign_request(oauth2.SignatureMethod_HMAC_SHA1(), consumer, token)
    signed_url = oauth_request.to_url()

    try:
        r = requests.get(signed_url)
        #print r.json()
        time.sleep(1.0)
        print lat, lon, len(r.json()['businesses'])
        return len(r.json()[u'businesses'])
    except Exception as e:
        print lat, lon, "ERROR!", e
        return -1

intersections = pd.read_csv('crash_data/intersections.csv')
radius = 80
intersections['YELP'+str(radius)] = intersections["INTERSECTION"].apply(number_businesses)
#intersections.head(n=10)
intersections.to_csv('crash_data/intersections.csv', index=False)

In [17]:
# JOIN THE YELP COLUMN
import os
files = os.popen('ls ../*.csv').read().split('\n')[:-1]

intersections = pd.read_csv('../crash_data/intersections.csv')
for fn in files:
    yelp_intersections = pd.read_csv(fn)
    new_column = yelp_intersections.columns[-1]
    intersections = intersections.merge(yelp_intersections[["INTERSECTION", new_column]], on="INTERSECTION")
#intersections.drop("YELP80", axis=1, inplace=True)
intersections.to_csv('../crash_data/intersections.csv', index=False)
intersections.head()


Out[17]:
INTERSECTION NUMBER OF CYCLIST KILLED ON STREET NAME CRASHES NUMBER OF PERSONS KILLED NUMBER OF MOTORIST INJURED ZIP CODE LONGITUDE NUMBER OF CYCLIST INJURED NUMBER OF MOTORIST KILLED ... LATITUDE NUMBER OF PERSONS INJURED BOROUGH OFF STREET NAME SPEED HUMP YELP_BAR_80 YELP_DRINK_80 YELP_FOOD_80 YELP_MUSIC_80 YELP_SHOW_80
0 40.6687978_-73.9311201 0 EASTERN PARKWAY 196 0 39 11213 -73.931120 5 0 ... 40.668798 79 BROOKLYN NaN False 0 1 6 0 0
1 40.8133899_-73.9562587 0 AMSTERDAM AVENUE 175 0 12 10027 -73.956259 3 0 ... 40.813390 47 MANHATTAN NaN False 3 3 11 1 2
2 40.7572323_-73.9897922 0 WEST 42 STREET 324 0 10 10036 -73.989792 4 0 ... 40.757232 44 MANHATTAN NaN False 16 19 20 11 14
3 40.7589746_-73.9189996 0 STEINWAY STREET 93 0 9 11103 -73.919000 1 0 ... 40.758975 35 QUEENS NaN False 3 6 11 3 3
4 40.820189_-73.8906752 0 BRUCKNER BOULEVARD 227 0 42 10459 -73.890675 2 0 ... 40.820189 69 BRONX NaN False 0 0 1 0 0

5 rows × 23 columns


In [ ]:
# ADD FOUR SQUARE COLUMNS
import pandas as pd
import numpy as np
import time
import requests

# READ IN INTERSECTION DATA
intersections = pd.read_csv('crash_data/intersections.csv')

def number_businesses(intersection):
    """
    Queries four square api for number of businesses
    around a given lat and lon.
    """
    split = intersection.split('_')
    if len(split) < 2:
        return 0
    lat, lon = map(float, split)
    #radius = 80  # approx 1/2 block in meters

    # creds from four square
    client_id=""
    client_secret=""
    
    #build the request
    url ='https://api.foursquare.com/v2/venues/search?limit=50&client_id={0}&client_secret={1}&ll={2},{3}&radius={4}&query={5}'
    url = url.format(client_id, client_secret, lat, lon, radius, term)    

    try:
        time.sleep(0.5)
        r = requests.get(url)
        checkins = sum( [v['stats']['checkinsCount'] for v in j['response']['venues']] )
        #print r.json()
        print lat, lon, checkins
        return checkins
    except Exception as e:
        print lat, lon, "ERROR!", e, "|", url, "|", r.json()
        return -1

radius = 80
term = 'FOOD'
intersections['4SQ_'+term+'_'+str(radius)] = intersections["INTERSECTION"].apply(number_businesses)
#intersections.head(n=10)
intersections.to_csv('intersections_4SQ_'+term+str(radius)+'.csv', index=False)

In [10]:
# JOIN THE 4SQ COLUMN
import os
files = os.popen('ls ../*.csv').read().split('\n')[:-2]

intersections = pd.read_csv('../crash_data/intersections.csv')
for fn in files:
    foursq_intersections = pd.read_csv(fn)
    new_column = foursq_intersections.columns[-1]
    intersections = intersections.merge(foursq_intersections[["INTERSECTION", new_column]], on="INTERSECTION")
intersections.drop("INTERSECTION", inplace=True, axis=1)
#intersections.drop("YELP80", axis=1, inplace=True)
intersections.to_csv('../crash_data/intersections.csv', index=False)
intersections.head()


Out[10]:
NUMBER OF CYCLIST KILLED ON STREET NAME CRASHES NUMBER OF PERSONS KILLED NUMBER OF MOTORIST INJURED ZIP CODE LONGITUDE NUMBER OF CYCLIST INJURED NUMBER OF MOTORIST KILLED CROSS STREET NAME ... NUMBER OF PERSONS INJURED BOROUGH OFF STREET NAME YELP_BAR_80 YELP_DRINK_80 YELP_FOOD_80 YELP_MUSIC_80 YELP_SHOW_80 4SQ_DRINK_80 4SQ_FOOD_80
0 0 FLATBUSH AVENUE EXTENSION 421 1 52 11201 -73.984529 3 1 FLATBUSH AVENUE EXTENSION ... 62 BROOKLYN NaN 0 0 0 0 0 0 0
1 0 WEST 42 STREET 324 0 10 10036 -73.989792 4 0 WEST 42 STREET ... 44 MANHATTAN NaN 16 19 20 11 14 0 278
2 0 QUEENSBORO BRIDGE APPROACH 322 0 24 10022 -73.964314 3 0 EAST 59 STREET ... 36 MANHATTAN NaN 2 1 2 3 4 0 0
3 0 PENNSYLVANIA AVENUE 303 0 114 11207 -73.890623 3 0 PENNSYLVANIA AVENUE ... 123 BROOKLYN NaN 0 1 2 0 1 0 0
4 0 PENNSYLVANIA AVENUE 289 0 82 11207 -73.896853 1 0 PENNSYLVANIA AVENUE ... 91 BROOKLYN NaN 0 0 0 1 1 0 0

5 rows × 23 columns


In [25]:
#RENAME COLUMNS
old_names = ['YELP_BAR_80', 'YELP_DRINK_80', 'YELP_FOOD_80', 'YELP_MUSIC_80', 'YELP_SHOW_80', '4SQ_DRINK_80', '4SQ_FOOD_80']
new_names = ['YELP_BAR', 'YELP_DRINK', 'YELP_FOOD', 'YELP_MUSIC', 'YELP_SHOW', 'FOUR_SQUARE_DRINK', 'FOUR_SQUARE_FOOD']

intersections[new_names] = intersections[old_names]
intersections.drop(old_names, axis=1, inplace=True)
intersections.to_csv('../crash_data/intersections.csv', index=False)
intersections.head()


Out[25]:
<matplotlib.axes.AxesSubplot at 0x7f821bb6aed0>

In [274]:
# ADD SPEED HUMP COLUMN TO INTERSECTIONS
def speed_hump(st):
    """
    Go through humps file and check if intersections
    has hump
    """
    return not humps[(st["CROSS STREET NAME"] == humps.FIRST_Main) & (st["ON STREET NAME"] == humps.FIRST_ToSt)].empty or \
           not humps[(st["ON STREET NAME"] == humps.FIRST_Main) & (st["CROSS STREET NAME"] == humps.FIRST_ToSt)].empty
  
intersections = pd.read_csv('crash_data/intersections.csv')
intersections['SPEED HUMP'] = intersections[["ON STREET NAME", "CROSS STREET NAME"]].apply(speed_hump, axis=1)
#intersections[['ON STREET NAME','CROSS STREET NAME','SPEED HUMP']].head(n=25)
intersections.to_csv('crash_data/intersections.csv', index=False)

In [18]:
# READ IN INTERSECTIONS TO CHECK
intersections = pd.read_csv('../crash_data/intersections.csv')
intersections.sort("INTERSECTION",  inplace=True, ascending=False,)
print len(intersections)
intersections.head(n=10)


39224
Out[18]:
INTERSECTION NUMBER OF CYCLIST KILLED ON STREET NAME CRASHES NUMBER OF PERSONS KILLED NUMBER OF MOTORIST INJURED ZIP CODE LONGITUDE NUMBER OF CYCLIST INJURED NUMBER OF MOTORIST KILLED ... LATITUDE NUMBER OF PERSONS INJURED BOROUGH OFF STREET NAME SPEED HUMP YELP_BAR_80 YELP_DRINK_80 YELP_FOOD_80 YELP_MUSIC_80 YELP_SHOW_80
39223 40.9128276_-73.9025057 0 RIVERDALE AVENUE 1 0 0 10471 -73.902506 0 0 ... 40.912828 0 BRONX 6301 RIVERDALE AVE False 0 0 0 0 0
29105 40.9124681_-73.9025894 0 RIVERDALE AVENUE 17 0 1 10471 -73.902589 0 0 ... 40.912468 1 BRONX NaN False 0 0 0 0 0
16347 40.9122231_-73.9016744 0 DELAFIELD AVENUE 2 0 0 10471 -73.901674 0 0 ... 40.912223 0 BRONX NaN False 0 0 0 0 0
16346 40.9120796_-73.9007846 0 WEST 263 STREET 2 0 1 10471 -73.900785 0 0 ... 40.912080 1 BRONX NaN False 0 0 0 0 0
16345 40.9120185_-73.8999924 0 WEST 263 STREET 2 0 0 10471 -73.899992 0 0 ... 40.912019 0 BRONX NaN False 0 0 0 0 0
22565 40.9114172_-73.8966322 0 BROADWAY 1 0 0 10471 -73.896632 0 0 ... 40.911417 0 BRONX NaN False 2 2 3 1 0
5120 40.9113269_-73.90656 0 PALISADES AVENUE 8 0 0 10471 -73.906560 0 0 ... 40.911327 2 BRONX NaN False 0 0 0 0 0
16344 40.9112391_-73.9000876 0 WEST 262 STREET 2 0 0 10471 -73.900088 0 0 ... 40.911239 0 BRONX NaN False 0 0 0 0 0
16343 40.9110435_-73.8992269 0 WEST 262 STREET 2 0 2 10471 -73.899227 0 0 ... 40.911043 2 BRONX NaN False 0 0 0 0 0
16342 40.9109115_-73.905439 0 INDEPENDENCE AVENUE 2 0 0 10471 -73.905439 0 0 ... 40.910911 0 BRONX NaN False 0 0 0 0 0

10 rows × 23 columns