In [2]:
import pandas as pd
import numpy as np

INITIAL READING

This is SUPPOSED to be the ipython notebook for constructing the clean intersections data set. The original was accidentally deleted so I need to rewrite it.


In [246]:
crashes = pd.read_csv('../crash_data/crashes.csv', names=['DATE', 'TIME', 'BOROUGH', 'ZIP CODE', 'LATITUDE', 'LONGITUDE', 
                                                       'ON STREET NAME', 'CROSS STREET NAME', 'OFF STREET NAME',
                                                       'NUMBER OF PERSONS INJURED', 'NUMBER OF PERSONS KILLED',
                                                       'NUMBER OF PEDESTRIANS INJURED', 'NUMBER OF PEDESTRIANS KILLED',
                                                       'NUMBER OF CYCLIST INJURED', 'NUMBER OF CYCLIST KILLED',
                                                       'NUMBER OF MOTORIST INJURED', 'NUMBER OF MOTORIST KILLED',
                                                       'CONTRIBUTING FACTOR VEHICLE 1', 'CONTRIBUTING FACTOR VEHICLE 2',
                                                       'CONTRIBUTING FACTOR VEHICLE 3', 'CONTRIBUTING FACTOR VEHICLE 4', 
                                                       'CONTRIBUTING FACTOR VEHICLE 5', 'UNIQUE KEY', 'VEHICLE TYPE CODE 1',
                                                       'VEHICLE TYPE CODE 2', 'VEHICLE TYPE CODE 3', 'VEHICLE TYPE CODE 4',
                                                       'VEHICLE TYPE CODE 5'])
crashes.head()


Out[246]:
DATE TIME BOROUGH ZIP CODE LATITUDE LONGITUDE ON STREET NAME CROSS STREET NAME OFF STREET NAME NUMBER OF PERSONS INJURED ... CONTRIBUTING FACTOR VEHICLE 2 CONTRIBUTING FACTOR VEHICLE 3 CONTRIBUTING FACTOR VEHICLE 4 CONTRIBUTING FACTOR VEHICLE 5 UNIQUE KEY VEHICLE TYPE CODE 1 VEHICLE TYPE CODE 2 VEHICLE TYPE CODE 3 VEHICLE TYPE CODE 4 VEHICLE TYPE CODE 5
0 01/04/2015 16:30 QUEENS 11354 40.771038 -73.834119 WHITESTONE EXPRESSWAY LINDEN PLACE NaN 0 ... Unspecified NaN NaN NaN 3147330 PASSENGER VEHICLE PASSENGER VEHICLE NaN NaN NaN
1 01/04/2015 16:30 QUEENS 11359 40.789468 -73.786030 CROSS ISLAND PARKWAY NaN NaN 0 ... Unspecified NaN NaN NaN 3147784 PASSENGER VEHICLE VAN NaN NaN NaN
2 01/04/2015 16:50 BRONX 10475 40.869429 -73.825737 BARTOW AVENUE NaN 2100 BARTOW AVENUE 0 ... Unspecified NaN NaN NaN 3147071 PASSENGER VEHICLE PASSENGER VEHICLE NaN NaN NaN
3 01/04/2015 17:00 MANHATTAN 10009 40.727846 -73.982227 EAST 10 STREET AVENUE A NaN 1 ... Unspecified NaN NaN NaN 3146986 PASSENGER VEHICLE BICYCLE NaN NaN NaN
4 01/04/2015 17:04 BRONX 10466 40.884735 -73.840441 BAYCHESTER AVENUE CRAWFORD AVENUE NaN 1 ... Unspecified NaN NaN NaN 3147083 PASSENGER VEHICLE UNKNOWN NaN NaN NaN

5 rows × 28 columns

CLEAN UP

Go through and clean the data set: Use the open street mapper / google maps geocoder to find the lat/lon or street name for rows missing this information.


In [245]:
import requests
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import json
import time

# CLEAN CRASH DATA SET
crashes = open('../crash_data/crashes.csv', 'r')
heading = crashes.readline().strip().split(',')  # skip the first line
w = open('../crash_data/crashes_clean.csv', 'w')
 
    
# Example: METROPOLITAN AVENUE and 74 AVENUE
def latlon_geocode(latlon):
    r = requests.get('http://nominatim.openstreetmap.org/search?q='+latlon+'&addressdetails=1&format=json')
    return r.json()

def address_geocode(address):
    r = requests.get('https://maps.googleapis.com/maps/api/geocode/json?address='+address)
    return r.json()

#go through each line and clean it
count = 0
for crash in crashes:
    time.sleep(1)
    if count > 4:
        break
    count+=1
        
    #parse the line
    line = crash.strip().split(',')
    print line[:6],
    bor = line[2]
    zip_ = line[3]
    lat = line[4]
    lon = line[5]
    st1 = line[6]
    st2 = line[7]
    st3 = line[8]

    #find missing info if needed
    try:
        if bor != '' and zip_ != '' and st1 != '' and st2 != '': # this line is okay
            print 'okay'
            pass
        elif lat != '' and lon != '': 
            print 'lat/lon geocoding'
            geo = latlon_geocode(lat+','+lon)
            bor = geo[0]['address']['county'][:geo[0]['address']['county'].index(' ')]
            zip_ = geo[0]['address']['postcode']
            st1 = geo[0]['address']['road']
        elif st1 != '' and st2 != '':
            print 'st1/st2 geocoding'
            geo = address_geocode(st1+' and '+st2)            
            bor = [ac['short_name'] for ac in geo['results'][0]['address_components'] if 'sublocality' in ac['types']][0]
            zip_ = [ac['short_name'] for ac in geo['results'][0]['address_components'] if 'postal_code' in ac['types']][0]
            lat = geo['results'][0]['geometry']['location']['lat']
            lon = geo['results'][0]['geometry']['location']['lng']
            #neigh = [ac['short_name'] ac in geo['results'][0]['address_components'] if 'neighborhood' in ac['types']][0]
        elif st3 != '':
            print 'st3 geocoding'
            geo = address_geocode(st3)
            bor = [ac['short_name'] for ac in geo['results'][0]['address_components'] if 'sublocality' in ac['types']][0]
            zip_ = [ac['short_name'] for ac in geo['results'][0]['address_components'] if 'postal_code' in ac['types']][0]
            lat = geo['results'][0]['geometry']['location']['lat']
            lon = geo['results'][0]['geometry']['location']['lng']
            #neigh = [ac['short_name'] ac in geo['results'][0]['address_components'] if 'neighborhood' in ac['types']][0]
        else:
            print "no location details!:"
    except Exception as e:
        print "exception"

    #replace line entries
    line[2] = bor.upper()
    line[3] = zip_
    line[4] = str(lat)
    line[5] = str(lon)
    line[6] = st1.upper()
    line[7] = st2.upper()
    
    w.write(','.join(line)+'\n')
    
crashes.close()
w.close()


['01/04/2015', '16:30', 'QUEENS', '11430', '40.6661512', '-73.8057432'] okay
['01/04/2015', '16:30', '', '', '40.7710377', '-73.8341189'] lat/lon geocoding
['01/04/2015', '16:30', '', '', '40.7894677', '-73.78603'] lat/lon geocoding
['01/04/2015', '16:50', 'BRONX', '10475', '40.8694288', '-73.8257368'] lat/lon geocoding
['01/04/2015', '17:00', 'MANHATTAN', '10009', '40.727846', '-73.9822272'] okay

BUILD INTERSECTION DATA SET

Add an intersection name that is the concatenation of lat and lon to use as the index. Add crash column for summing the number of crashes.


In [247]:
# add index for intersection
crashes['INTERSECTION'] = crashes['LATITUDE'].apply(str) + "_" + crashes['LONGITUDE'].apply(str)
crashes['CRASHES'] = 1
print crashes.columns
crashes.head(n=5)


Index([u'DATE', u'TIME', u'BOROUGH', u'ZIP CODE', u'LATITUDE', u'LONGITUDE', u'ON STREET NAME', u'CROSS STREET NAME', u'OFF STREET NAME', u'NUMBER OF PERSONS INJURED', u'NUMBER OF PERSONS KILLED', u'NUMBER OF PEDESTRIANS INJURED', u'NUMBER OF PEDESTRIANS KILLED', u'NUMBER OF CYCLIST INJURED', u'NUMBER OF CYCLIST KILLED', u'NUMBER OF MOTORIST INJURED', u'NUMBER OF MOTORIST KILLED', u'CONTRIBUTING FACTOR VEHICLE 1', u'CONTRIBUTING FACTOR VEHICLE 2', u'CONTRIBUTING FACTOR VEHICLE 3', u'CONTRIBUTING FACTOR VEHICLE 4', u'CONTRIBUTING FACTOR VEHICLE 5', u'UNIQUE KEY', u'VEHICLE TYPE CODE 1', u'VEHICLE TYPE CODE 2', u'VEHICLE TYPE CODE 3', u'VEHICLE TYPE CODE 4', u'VEHICLE TYPE CODE 5', u'INTERSECTION', u'CRASHES'], dtype='object')
Out[247]:
DATE TIME BOROUGH ZIP CODE LATITUDE LONGITUDE ON STREET NAME CROSS STREET NAME OFF STREET NAME NUMBER OF PERSONS INJURED ... CONTRIBUTING FACTOR VEHICLE 4 CONTRIBUTING FACTOR VEHICLE 5 UNIQUE KEY VEHICLE TYPE CODE 1 VEHICLE TYPE CODE 2 VEHICLE TYPE CODE 3 VEHICLE TYPE CODE 4 VEHICLE TYPE CODE 5 INTERSECTION CRASHES
0 01/04/2015 16:30 QUEENS 11354 40.771038 -73.834119 WHITESTONE EXPRESSWAY LINDEN PLACE NaN 0 ... NaN NaN 3147330 PASSENGER VEHICLE PASSENGER VEHICLE NaN NaN NaN 40.7710377_-73.8341189 1
1 01/04/2015 16:30 QUEENS 11359 40.789468 -73.786030 CROSS ISLAND PARKWAY NaN NaN 0 ... NaN NaN 3147784 PASSENGER VEHICLE VAN NaN NaN NaN 40.7894677_-73.78603 1
2 01/04/2015 16:50 BRONX 10475 40.869429 -73.825737 BARTOW AVENUE NaN 2100 BARTOW AVENUE 0 ... NaN NaN 3147071 PASSENGER VEHICLE PASSENGER VEHICLE NaN NaN NaN 40.8694288_-73.8257368 1
3 01/04/2015 17:00 MANHATTAN 10009 40.727846 -73.982227 EAST 10 STREET AVENUE A NaN 1 ... NaN NaN 3146986 PASSENGER VEHICLE BICYCLE NaN NaN NaN 40.727846_-73.9822272 1
4 01/04/2015 17:04 BRONX 10466 40.884735 -73.840441 BAYCHESTER AVENUE CRAWFORD AVENUE NaN 1 ... NaN NaN 3147083 PASSENGER VEHICLE UNKNOWN NaN NaN NaN 40.8847353_-73.8404411 1

5 rows × 30 columns


In [257]:
# GROUP CRASHES BY INTERSECTION AGGREGATING CRASHES THROWING AWAY OTHER
gb = crashes.groupby(by='INTERSECTION', as_index=False)

longest = lambda x: x.get(x.map(lambda x: len(str(x))).idxmax())  # longest string
mirror = lambda x: x.iget(0)
agg_fun = {u'BOROUGH':longest,
           u'ZIP CODE':mirror,
           u'LATITUDE':mirror,
           u'LONGITUDE':mirror,
           u'ON STREET NAME':longest,
           u'CROSS STREET NAME':longest,
           u'OFF STREET NAME':longest,
           u'NUMBER OF PERSONS INJURED':np.sum,
           u'NUMBER OF PERSONS KILLED':np.sum,
           u'NUMBER OF PEDESTRIANS INJURED':np.sum,
           u'NUMBER OF PEDESTRIANS KILLED':np.sum,
           u'NUMBER OF CYCLIST INJURED':np.sum,
           u'NUMBER OF CYCLIST KILLED':np.sum,
           u'NUMBER OF MOTORIST INJURED':np.sum,
           u'NUMBER OF MOTORIST KILLED':np.sum,
           'CRASHES':np.sum
           }
groupby = gb.aggregate(agg_fun)  # aggregate

#REMOVE ANY INTERSECTIONS WITH BAD GEOCODING -- lat north/south, lon east/west
#define a bounding box
top_lat = 40.935206
bot_lat = 40.488640
right_lon = -73.699619
left_lon = -74.261982
intersections = groupby[ (groupby.LONGITUDE  > left_lon) & (groupby.LONGITUDE < right_lon) &
                                   (groupby.LATITUDE > bot_lat) & (groupby.LATITUDE < top_lat) ]
intersections.to_csv('../crash_data/intersections.csv', index=False)  # save to csv
intersections.sort('INTERSECTION').head()


Out[257]:
INTERSECTION NUMBER OF CYCLIST KILLED ON STREET NAME CRASHES NUMBER OF PERSONS KILLED NUMBER OF MOTORIST INJURED ZIP CODE LONGITUDE NUMBER OF CYCLIST INJURED NUMBER OF MOTORIST KILLED CROSS STREET NAME NUMBER OF PEDESTRIANS KILLED NUMBER OF PEDESTRIANS INJURED LATITUDE NUMBER OF PERSONS INJURED BOROUGH OFF STREET NAME
8 40.4989488_-74.2443651 0 SWINNERTON STREET 2 0 0 10307 -74.244365 0 0 BILLOP AVENUE 0 0 40.498949 0 STATEN ISLAND NaN
9 40.499842_-74.2399169 0 YETMAN AVENUE 2 0 0 10307 -74.239917 0 0 BILLOP AVENUE 0 0 40.499842 0 STATEN ISLAND NaN
10 40.5000251_-74.2390115 0 ROCKAWAY STREET 3 0 0 10307 -74.239012 0 0 BILLOP AVENUE 0 0 40.500025 0 STATEN ISLAND NaN
11 40.5002081_-74.2381024 0 LORETTO STREET 1 0 0 10307 -74.238102 0 0 BILLOP AVENUE 0 0 40.500208 0 STATEN ISLAND NaN
12 40.5004173_-74.2364851 0 SPRAGUE AVENUE 1 0 0 10307 -74.236485 0 0 SURF AVENUE 0 0 40.500417 0 STATEN ISLAND NaN

In [256]:
#Select only those that are within new york bounding box
#lat north/south, lon east/west
top_lat = 40.935206
bot_lat = 40.488640
right_lon = -73.699619
left_lon = -74.261982
intersections = pd.read_csv('../crash_data/intersections.csv')
print 'old number intersections', len(intersections)
    
print 'initial number of intersections:', len(groupby)
new_intersections = groupby[ (groupby.LONGITUDE  > left_lon) & (groupby.LONGITUDE < right_lon) &
                                   (groupby.LATITUDE > bot_lat) & (groupby.LATITUDE < top_lat) ]
print 'new intersections:', len(new_intersections)
new_intersections.sort('INTERSECTION').head()


old number intersections 39239
initial number of intersections: 41109
new intersections: 41096
Out[256]:
INTERSECTION NUMBER OF CYCLIST KILLED ON STREET NAME CRASHES NUMBER OF PERSONS KILLED NUMBER OF MOTORIST INJURED ZIP CODE LONGITUDE NUMBER OF CYCLIST INJURED NUMBER OF MOTORIST KILLED CROSS STREET NAME NUMBER OF PEDESTRIANS KILLED NUMBER OF PEDESTRIANS INJURED LATITUDE NUMBER OF PERSONS INJURED BOROUGH OFF STREET NAME
8 40.4989488_-74.2443651 0 SWINNERTON STREET 2 0 0 10307 -74.244365 0 0 BILLOP AVENUE 0 0 40.498949 0 STATEN ISLAND NaN
9 40.499842_-74.2399169 0 YETMAN AVENUE 2 0 0 10307 -74.239917 0 0 BILLOP AVENUE 0 0 40.499842 0 STATEN ISLAND NaN
10 40.5000251_-74.2390115 0 ROCKAWAY STREET 3 0 0 10307 -74.239012 0 0 BILLOP AVENUE 0 0 40.500025 0 STATEN ISLAND NaN
11 40.5002081_-74.2381024 0 LORETTO STREET 1 0 0 10307 -74.238102 0 0 BILLOP AVENUE 0 0 40.500208 0 STATEN ISLAND NaN
12 40.5004173_-74.2364851 0 SPRAGUE AVENUE 1 0 0 10307 -74.236485 0 0 SURF AVENUE 0 0 40.500417 0 STATEN ISLAND NaN

In [93]:
#!!! THIS PART HAS BEEN MADE OBSOLUTE!!!

# CREATE COUNTS FILE SAVE AND THEN MERGE
#counts = pd.DataFrame(gb.size()) 
#counts.to_csv('groupby_counts_junk.csv')  # save to save csv

#read csvs and merge
#groupby = pd.read_csv('groupby_agg_junk.csv')
#counts = pd.read_csv('groupby_counts_junk.csv', names=["INTERSECTION", "CRASHES"])

#merge together and clean up
#df = groupby.merge(counts, on="INTERSECTION")
#df.drop(df.columns[:1], axis=1, inplace=True) # remove indexing column
#df = df[df["INTERSECTION"]!="nan_nan"]  # eliminating all intersections with lat/lon ~80k crashes
#df.rename(columns={u"0":"CRASHES"}, inplace=True)  # rename "0" to "CRASHES"
#df.to_csv('crash_data/intersections.csv', index=False)

In [ ]:
#  ADD YELP COLUMN TO INTERSECTIONS
import pandas as pd
import numpy as np
import time
import oauth2
import requests

def number_businesses(intersection):
    """
    Queries yelp api for number of businesses
    around a given lat and lon.
    """
    split = intersection.split('_')
    if len(split) < 2:
        return 0
    lat, lon = map(float, split)
    #radius = 80  # approx 1/2 block in meters

    # OAuth credentials from Yelp
    CONSUMER_KEY = 'T-urieHnNUVui5RcKb1XyQ'
    CONSUMER_SECRET = '7rpMeGcyngvXcmXbNuMlGq1VqwU'
    TOKEN = 'byRjsqCx1h7lHuu0xAq32L8WB2pEWQHR'
    TOKEN_SECRET = '43aSvtErO0nH7YO5Rmg8XSWYuvU'

    #build the request
    url = 'http://api.yelp.com/v2/search?term=FOOD&ll={0},{1}&radius_filter={2}'.format(lat, lon, radius)
    consumer = oauth2.Consumer(CONSUMER_KEY, CONSUMER_SECRET)
    oauth_request = oauth2.Request(method="GET", url=url, parameters={})
    oauth_request.update(
        {
            'oauth_nonce': oauth2.generate_nonce(),
            'oauth_timestamp': oauth2.generate_timestamp(),
            'oauth_token': TOKEN,
            'oauth_consumer_key': CONSUMER_KEY
        }
    )
    token = oauth2.Token(TOKEN, TOKEN_SECRET)
    oauth_request.sign_request(oauth2.SignatureMethod_HMAC_SHA1(), consumer, token)
    signed_url = oauth_request.to_url()

    try:
        r = requests.get(signed_url)
        #print r.json()
        time.sleep(1.0)
        print lat, lon, len(r.json()['businesses'])
        return len(r.json()[u'businesses'])
    except Exception as e:
        print lat, lon, "ERROR!", e
        return -1

intersections = pd.read_csv('crash_data/intersections.csv')
radius = 80
intersections['YELP'+str(radius)] = intersections["INTERSECTION"].apply(number_businesses)
#intersections.head(n=10)
intersections.to_csv('crash_data/intersections.csv', index=False)

In [17]:
# JOIN THE YELP COLUMN
import os
files = os.popen('ls ../*.csv').read().split('\n')[:-1]

intersections = pd.read_csv('../crash_data/intersections.csv')
for fn in files:
    yelp_intersections = pd.read_csv(fn)
    new_column = yelp_intersections.columns[-1]
    intersections = intersections.merge(yelp_intersections[["INTERSECTION", new_column]], on="INTERSECTION")
#intersections.drop("YELP80", axis=1, inplace=True)
intersections.to_csv('../crash_data/intersections.csv', index=False)
intersections.head()


Out[17]:
INTERSECTION NUMBER OF CYCLIST KILLED ON STREET NAME CRASHES NUMBER OF PERSONS KILLED NUMBER OF MOTORIST INJURED ZIP CODE LONGITUDE NUMBER OF CYCLIST INJURED NUMBER OF MOTORIST KILLED ... LATITUDE NUMBER OF PERSONS INJURED BOROUGH OFF STREET NAME SPEED HUMP YELP_BAR_80 YELP_DRINK_80 YELP_FOOD_80 YELP_MUSIC_80 YELP_SHOW_80
0 40.6687978_-73.9311201 0 EASTERN PARKWAY 196 0 39 11213 -73.931120 5 0 ... 40.668798 79 BROOKLYN NaN False 0 1 6 0 0
1 40.8133899_-73.9562587 0 AMSTERDAM AVENUE 175 0 12 10027 -73.956259 3 0 ... 40.813390 47 MANHATTAN NaN False 3 3 11 1 2
2 40.7572323_-73.9897922 0 WEST 42 STREET 324 0 10 10036 -73.989792 4 0 ... 40.757232 44 MANHATTAN NaN False 16 19 20 11 14
3 40.7589746_-73.9189996 0 STEINWAY STREET 93 0 9 11103 -73.919000 1 0 ... 40.758975 35 QUEENS NaN False 3 6 11 3 3
4 40.820189_-73.8906752 0 BRUCKNER BOULEVARD 227 0 42 10459 -73.890675 2 0 ... 40.820189 69 BRONX NaN False 0 0 1 0 0

5 rows × 23 columns


In [ ]:
# ADD FOUR SQUARE COLUMNS
import pandas as pd
import numpy as np
import time
import requests

# READ IN INTERSECTION DATA
intersections = pd.read_csv('crash_data/intersections.csv')

def number_businesses(intersection):
    """
    Queries four square api for number of businesses
    around a given lat and lon.
    """
    split = intersection.split('_')
    if len(split) < 2:
        return 0
    lat, lon = map(float, split)
    #radius = 80  # approx 1/2 block in meters

    # creds from four square
    client_id=""
    client_secret=""
    
    #build the request
    url ='https://api.foursquare.com/v2/venues/search?limit=50&client_id={0}&client_secret={1}&ll={2},{3}&radius={4}&query={5}'
    url = url.format(client_id, client_secret, lat, lon, radius, term)    

    try:
        time.sleep(0.5)
        r = requests.get(url)
        checkins = sum( [v['stats']['checkinsCount'] for v in j['response']['venues']] )
        #print r.json()
        print lat, lon, checkins
        return checkins
    except Exception as e:
        print lat, lon, "ERROR!", e, "|", url, "|", r.json()
        return -1

radius = 80
term = 'FOOD'
intersections['4SQ_'+term+'_'+str(radius)] = intersections["INTERSECTION"].apply(number_businesses)
#intersections.head(n=10)
intersections.to_csv('intersections_4SQ_'+term+str(radius)+'.csv', index=False)

In [274]:
# ADD SPEED HUMP COLUMN TO INTERSECTIONS
def speed_hump(st):
    """
    Go through humps file and check if intersections
    has hump
    """
    return not humps[(st["CROSS STREET NAME"] == humps.FIRST_Main) & (st["ON STREET NAME"] == humps.FIRST_ToSt)].empty or \
           not humps[(st["ON STREET NAME"] == humps.FIRST_Main) & (st["CROSS STREET NAME"] == humps.FIRST_ToSt)].empty
  
intersections = pd.read_csv('crash_data/intersections.csv')
intersections['SPEED HUMP'] = intersections[["ON STREET NAME", "CROSS STREET NAME"]].apply(speed_hump, axis=1)
#intersections[['ON STREET NAME','CROSS STREET NAME','SPEED HUMP']].head(n=25)
intersections.to_csv('crash_data/intersections.csv', index=False)

In [24]:
# READ IN INTERSECTIONS TO CHECK
intersections = pd.read_csv('../crash_data/clean_intersections.csv')
#intersections.drop(intersections.name=="0.0_0.0", inplace=True)
intersections.drop(intersections.name=="44.686796_-75.5084676", inplace=True)


intersections.sort("name",  inplace=True, ascending=False,)
print len(intersections)
#intersections[intersections.name=="44.686796_-75.5084676"].head(n=10)
intersections.to_csv('../crash_data/clean_intersections.csv', index=False)
intersections.head()


38047
Out[24]:
name pedestr_killed following_too_closely bicycle pedicab large_com_veh_6_or_more_tires unsafe_lane_changing collisions_with_injuries passengers_injured err_confusn_ped_bike_other_ped ... cyclists_killed listening_using_headphones livery_vehicle sport_utility_station_wagon taxi_vehicle passing_or_lane_usage ambulance failure_to_yield_right_of_way driver_inattention_distraction traffic_control_disregarded
1 40.9120795948_-73.9007846219 0 0 0 0 0 0 1 1 0 ... 0 0 0 4 0 0 0 1 0 0
2 40.9120185285_-73.8999923797 0 0 0 0 1 0 0 0 0 ... 0 0 0 1 0 0 0 0 0 0
3 40.9117918143_-73.898104152 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
4 40.9113268825_-73.9065599503 0 0 0 0 1 0 2 0 0 ... 0 0 0 1 2 0 0 2 0 0
5 40.9112391232_-73.9000876185 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0

5 rows × 70 columns


In [ ]: