In [2]:
import dask.distributed
import dask.dataframe as dd
import pandas as pd
import numpy as np
import urllib, json
import sklearn.neighbors
In [3]:
client = dask.distributed.Client()
In [4]:
df = dd.read_parquet('/bigdata/citibike.parquet')
In [5]:
df.head()
Out[5]:
trip_duration
start_time
stop_time
start_station_id
start_station_name
start_station_latitude
start_station_longitude
end_station_id
end_station_name
end_station_latitude
end_station_longitude
bike_id
user_type
birth_year
gender
0
634
2013-07-01 00:00:00
2013-07-01 00:10:34
164
E 47 St & 2 Ave
40.753231
-73.970322
504
1 Ave & E 15 St
40.732220
-73.981659
16950
Customer
NaN
0
1
1547
2013-07-01 00:00:02
2013-07-01 00:25:49
388
W 26 St & 10 Ave
40.749718
-74.002953
459
W 20 St & 11 Ave
40.746746
-74.007759
19816
Customer
NaN
0
2
178
2013-07-01 00:01:04
2013-07-01 00:04:02
293
Lafayette St & E 8 St
40.730286
-73.990768
237
E 11 St & 2 Ave
40.730473
-73.986725
14548
Subscriber
1980.0
2
3
1580
2013-07-01 00:01:06
2013-07-01 00:27:26
531
Forsyth St & Broome St
40.718941
-73.992661
499
Broadway & W 60 St
40.769154
-73.981918
16063
Customer
NaN
0
4
757
2013-07-01 00:01:10
2013-07-01 00:13:47
382
University Pl & E 14 St
40.734928
-73.992004
410
Suffolk St & Stanton St
40.720665
-73.985176
19213
Subscriber
1986.0
1
In [6]:
df.tail()
Out[6]:
trip_duration
start_time
stop_time
start_station_id
start_station_name
start_station_latitude
start_station_longitude
end_station_id
end_station_name
end_station_latitude
end_station_longitude
bike_id
user_type
birth_year
gender
51474
441
2016-12-31 23:56:15
2017-01-01 00:03:36
284
Greenwich Ave & 8 Ave
40.739017
-74.002640
336
Sullivan St & Washington Sq
40.730476
-73.999062
16185
Subscriber
1974.0
2
51475
1026
2016-12-31 23:56:19
2017-01-01 00:13:26
281
Grand Army Plaza & Central Park S
40.764397
-73.973717
3143
5 Ave & E 78 St
40.776829
-73.963890
18267
None
1983.0
1
51476
1747
2016-12-31 23:56:35
2017-01-01 00:25:43
3424
E 106 St & Lexington Ave
40.791977
-73.945992
3295
Central Park W & W 96 St
40.791271
-73.964836
19899
Subscriber
1970.0
2
51477
951
2016-12-31 23:59:31
2017-01-01 00:15:23
3158
W 63 St & Broadway
40.771637
-73.982613
3169
Riverside Dr & W 82 St
40.787209
-73.981277
16866
Subscriber
1961.0
1
51478
1322
2016-12-31 23:59:56
2017-01-01 00:21:58
3263
Cooper Square & E 7 St
40.729237
-73.990868
498
Broadway & W 32 St
40.748550
-73.988083
25793
Subscriber
1985.0
1
In [7]:
# Count rows
df.start_station_id.compute().shape
Out[7]:
(36902025,)
In [8]:
# Select start and end columns as separate dataframes,
# apply range checks on lats and lons to eliminate corrupted rows
df_s = df[['start_station_id', 'start_station_name',
'start_station_latitude', 'start_station_longitude']]
df_e = df[['end_station_id', 'end_station_name',
'end_station_latitude', 'end_station_longitude']]
df_s = df_s[(df_s.start_station_latitude > 40.) & (df_s.start_station_latitude < 41.)]
df_s = df_s[(df_s.start_station_longitude + 74.0 > -0.25) & (df_s.start_station_longitude + 74.0 < 0.25)]
df_e = df_e[(df_e.end_station_latitude > 40.) & (df_e.end_station_latitude < 41.)]
df_e = df_e[(df_e.end_station_longitude + 74.0 > -0.25) & (df_e.end_station_longitude + 74.0 < 0.25)]
In [9]:
df_s.head()
Out[9]:
start_station_id
start_station_name
start_station_latitude
start_station_longitude
0
164
E 47 St & 2 Ave
40.753231
-73.970322
1
388
W 26 St & 10 Ave
40.749718
-74.002953
2
293
Lafayette St & E 8 St
40.730286
-73.990768
3
531
Forsyth St & Broome St
40.718941
-73.992661
4
382
University Pl & E 14 St
40.734928
-73.992004
Take the start locations, end locations, calculate mean latitude and longitude, convert to pandas, rename columns and concatenate. Sort by id. This will contain duplicates due to numerical precision and due to actual duplication due to some other reason.
In [10]:
a1 = df_s.groupby(['start_station_id', 'start_station_name']).mean().compute()
a2 = df_e.groupby(['end_station_id', 'end_station_name']).mean().compute()
a1.index = a1.index.rename(['id', 'name'])
a1.columns = ['lat', 'lon']
a2.index = a2.index.rename(['id', 'name'])
a2.columns = ['lat', 'lon']
trip_points = a1.append(a2).drop_duplicates().reset_index()
trip_points = trip_points.sort_values('id').reset_index(drop=True)
del a1, a2
trip_points.head()
Out[10]:
id
name
lat
lon
0
72
W 52 St & 11 Ave
40.767272
-73.993927
1
72
W 52 St & 11 Ave
40.767273
-73.993926
2
79
Franklin St & W Broadway
40.719118
-74.006668
3
79
Franklin St & W Broadway
40.719117
-74.006671
4
82
St James Pl & Pearl St
40.711174
-74.000166
In [11]:
trip_points.shape
Out[11]:
(1413, 4)
In [12]:
# Uncomment these two lines to fetch latest file and rename it
# !wget https://feeds.citibikenyc.com/stations/stations.json
# !mv stations.json stations.`date +"%Y.%m.%d.%H.%M"`.json
In [13]:
df_cur = pd.DataFrame(
json.loads(
open('stations.2017.04.20.09.43.json', 'r').read()
)['stationBeanList'])
df_cur.head()
Out[13]:
altitude
availableBikes
availableDocks
city
id
landMark
lastCommunicationTime
latitude
location
longitude
postalCode
stAddress1
stAddress2
stationName
statusKey
statusValue
testStation
totalDocks
0
20
16
72
2017-04-20 09:43:27 AM
40.767272
-73.993929
W 52 St & 11 Ave
W 52 St & 11 Ave
1
In Service
False
39
1
12
20
79
2017-04-20 09:42:54 AM
40.719116
-74.006667
Franklin St & W Broadway
Franklin St & W Broadway
1
In Service
False
33
2
0
0
82
2017-04-13 12:05:52 PM
40.711174
-74.000165
St James Pl & Pearl St
St James Pl & Pearl St
3
Not In Service
False
0
3
35
24
83
2017-04-20 09:42:19 AM
40.683826
-73.976323
Atlantic Ave & Fort Greene Pl
Atlantic Ave & Fort Greene Pl
1
In Service
False
62
4
5
34
116
2017-04-20 09:42:20 AM
40.741776
-74.001497
W 17 St & 8 Ave
W 17 St & 8 Ave
1
In Service
False
39
In [14]:
df_cur = df_cur[['id', 'stationName', 'latitude', 'longitude']].sort_values('id')
df_cur.head()
Out[14]:
id
stationName
latitude
longitude
0
72
W 52 St & 11 Ave
40.767272
-73.993929
1
79
Franklin St & W Broadway
40.719116
-74.006667
2
82
St James Pl & Pearl St
40.711174
-74.000165
3
83
Atlantic Ave & Fort Greene Pl
40.683826
-73.976323
4
116
W 17 St & 8 Ave
40.741776
-74.001497
In [15]:
# We see this has less than half the rows of trip_points
print(df_cur.shape)
(664, 4)
In [16]:
bt = sklearn.neighbors.BallTree(df_cur[['latitude', 'longitude']].values, metric='euclidean')
In [17]:
np.cos(np.deg2rad(40.7))
Out[17]:
0.75813433619765225
25 meters is approximately 0.000225˚ (not accounting for latitude) latitude will give a factor of cos(41˚N)=0.76 at 41˚N. So we are really testing if the point lies within an ellipse with a radius of 25.6 meters in latitude and 25.6*0.75=19.2 meters in longitude. Should be good enough.
In [18]:
0.000225*111194.9 # 111.1949 km per degree latitude
Out[18]:
25.018852499999998
In [19]:
query_results = bt.query_radius(trip_points.ix[:, 2:], 0.000225)
The value of each row indicates the row in df_cur that matches each row in trip_points A zero for the first and second rows indicates that those rows in trip points match with the zeroth row in df_cur.
In [20]:
results_list = [[int(y) for y in x.tolist()] for x in query_results]
print(len(results_list))
results_list
1413
Out[20]:
[[0],
[0],
[1],
[1],
[2],
[2],
[3],
[3],
[4],
[4],
[5],
[5],
[6],
[6],
[7],
[7],
[8],
[8],
[9],
[9],
[10],
[10],
[11],
[11],
[12],
[12],
[13],
[13],
[14],
[14],
[15],
[15],
[16],
[16],
[17],
[17],
[18],
[18],
[],
[],
[19],
[19],
[20],
[20],
[21],
[21],
[22],
[22],
[23],
[23],
[24],
[24],
[25],
[25],
[26],
[26],
[27],
[27],
[28],
[28],
[],
[],
[29],
[29],
[30],
[30],
[31],
[31],
[32],
[32],
[33],
[33],
[34],
[34],
[651],
[],
[651],
[],
[35],
[35],
[36],
[36],
[37],
[37],
[38],
[38],
[39],
[39],
[],
[40],
[40],
[],
[],
[],
[42],
[42],
[43],
[43],
[44],
[44],
[45],
[45],
[46],
[46],
[],
[],
[],
[],
[47],
[47],
[48],
[48],
[49],
[49],
[50],
[50],
[],
[],
[51],
[51],
[52],
[52],
[53],
[53],
[54],
[54],
[55],
[55],
[56],
[56],
[],
[],
[57],
[57],
[58],
[58],
[59],
[59],
[60],
[60],
[61],
[61],
[62],
[62],
[],
[],
[63],
[63],
[64],
[64],
[65],
[65],
[66],
[66],
[],
[67],
[67],
[67],
[67],
[],
[68],
[68],
[69],
[69],
[],
[],
[71],
[71],
[72],
[72],
[73],
[73],
[],
[],
[74],
[74],
[],
[],
[],
[],
[76],
[76],
[77],
[77],
[78],
[78],
[],
[],
[],
[],
[],
[],
[79],
[79],
[80],
[80],
[81],
[81],
[82],
[82],
[83],
[83],
[84],
[84],
[85],
[85],
[86],
[86],
[87],
[87],
[88],
[88],
[89],
[89],
[90],
[90],
[90],
[90],
[91],
[91],
[92],
[92],
[],
[],
[93],
[93],
[94],
[94],
[95],
[95],
[],
[],
[96],
[],
[],
[96],
[],
[97],
[97],
[],
[98],
[98],
[99],
[99],
[100],
[100],
[101],
[101],
[102],
[102],
[103],
[103],
[104],
[104],
[105],
[105],
[],
[],
[],
[],
[106],
[106],
[107],
[107],
[108],
[108],
[109],
[109],
[110],
[110],
[111],
[111],
[112],
[112],
[113],
[113],
[114],
[114],
[115],
[115],
[116],
[116],
[117],
[117],
[118],
[118],
[119],
[119],
[120],
[120],
[],
[121],
[121],
[],
[122],
[122],
[123],
[123],
[124],
[124],
[125],
[125],
[126],
[126],
[127],
[127],
[128],
[128],
[129],
[129],
[130],
[130],
[131],
[131],
[132],
[132],
[133],
[133],
[134],
[134],
[135],
[135],
[136],
[136],
[137],
[137],
[138],
[138],
[139],
[139],
[140],
[140],
[],
[],
[141],
[141],
[142],
[142],
[143],
[143],
[144],
[144],
[],
[],
[],
[],
[145],
[145],
[146],
[146],
[147],
[147],
[148],
[148],
[149],
[149],
[150],
[150],
[151],
[151],
[],
[],
[152],
[152],
[153],
[153],
[154],
[154],
[155],
[155],
[156],
[156],
[157],
[157],
[158],
[158],
[159],
[159],
[160],
[160],
[161],
[161],
[162],
[162],
[163],
[163],
[164],
[164],
[165],
[165],
[166],
[166],
[167],
[167],
[168],
[168],
[169],
[169],
[170],
[170],
[],
[],
[171],
[171],
[172],
[172],
[173],
[173],
[174],
[174],
[175],
[175],
[176],
[176],
[177],
[177],
[178],
[178],
[179],
[179],
[180],
[180],
[181],
[181],
[182],
[182],
[183],
[183],
[184],
[184],
[185],
[185],
[186],
[186],
[187],
[187],
[188],
[188],
[189],
[189],
[190],
[],
[190],
[],
[191],
[191],
[192],
[192],
[],
[],
[193],
[193],
[194],
[194],
[195],
[195],
[196],
[196],
[197],
[197],
[198],
[198],
[199],
[199],
[200],
[200],
[201],
[201],
[202],
[202],
[203],
[203],
[204],
[204],
[204],
[204],
[205],
[205],
[206],
[206],
[207],
[207],
[208],
[208],
[209],
[209],
[210],
[210],
[211],
[211],
[212],
[212],
[213],
[213],
[214],
[214],
[215],
[215],
[216],
[216],
[217],
[217],
[218],
[218],
[219],
[219],
[220],
[220],
[221],
[221],
[],
[],
[],
[],
[222],
[222],
[223],
[223],
[224],
[224],
[225],
[225],
[226],
[226],
[227],
[227],
[228],
[228],
[229],
[229],
[230],
[230],
[231],
[231],
[],
[],
[232],
[232],
[233],
[233],
[234],
[234],
[235],
[235],
[236],
[236],
[237],
[237],
[238],
[238],
[239],
[239],
[240],
[240],
[241],
[241],
[242],
[242],
[243],
[243],
[244],
[244],
[],
[],
[245],
[245],
[246],
[246],
[247],
[247],
[248],
[248],
[249],
[249],
[250],
[250],
[251],
[251],
[252],
[252],
[253],
[253],
[254],
[254],
[255],
[255],
[256],
[256],
[257],
[257],
[258],
[258],
[259],
[259],
[259],
[259],
[260],
[260],
[261],
[261],
[262],
[262],
[263],
[263],
[],
[],
[264],
[264],
[],
[],
[265],
[265],
[266],
[266],
[267],
[267],
[268],
[268],
[269],
[475],
[269],
[269],
[475],
[269],
[270],
[270],
[271],
[271],
[271],
[],
[],
[271],
[272],
[272],
[],
[],
[483],
[483],
[273],
[273],
[274],
[274],
[275],
[275],
[276],
[276],
[277],
[277],
[],
[278],
[278],
[],
[279],
[279],
[280],
[280],
[281],
[281],
[282],
[282],
[283],
[283],
[284],
[284],
[285],
[285],
[286],
[286],
[287],
[287],
[],
[],
[],
[],
[288],
[288],
[],
[289],
[],
[289],
[290],
[290],
[291],
[291],
[292],
[292],
[],
[293],
[293],
[],
[294],
[294],
[295],
[295],
[296],
[296],
[297],
[297],
[298],
[298],
[299],
[299],
[300],
[300],
[301],
[301],
[302],
[302],
[],
[],
[303],
[303],
[],
[],
[304],
[304],
[305],
[305],
[306],
[306],
[],
[],
[],
[],
[],
[307],
[307],
[],
[],
[],
[],
[],
[476],
[],
[],
[308],
[308],
[309],
[309],
[310],
[310],
[311],
[311],
[312],
[312],
[313],
[313],
[314],
[314],
[315],
[315],
[316],
[316],
[317],
[317],
[318],
[318],
[319],
[319],
[320],
[320],
[321],
[321],
[322],
[322],
[323],
[323],
[324],
[324],
[325],
[325],
[326],
[326],
[327],
[327],
[328],
[328],
[329],
[329],
[330],
[330],
[331],
[331],
[332],
[332],
[333],
[333],
[334],
[334],
[335],
[335],
[336],
[336],
[337],
[337],
[338],
[338],
[339],
[339],
[340],
[340],
[341],
[341],
[342],
[342],
[343],
[343],
[],
[],
[344],
[344],
[345],
[345],
[346],
[346],
[347],
[347],
[],
[],
[348],
[348],
[349],
[349],
[350],
[350],
[351],
[351],
[],
[],
[352],
[352],
[353],
[353],
[354],
[354],
[355],
[355],
[356],
[356],
[357],
[357],
[358],
[358],
[],
[],
[],
[],
[],
[],
[359],
[359],
[360],
[360],
[361],
[361],
[362],
[362],
[],
[],
[],
[],
[363],
[363],
[364],
[364],
[365],
[365],
[366],
[366],
[367],
[367],
[368],
[368],
[369],
[369],
[370],
[370],
[371],
[371],
[372],
[372],
[373],
[373],
[374],
[374],
[375],
[375],
[376],
[376],
[377],
[377],
[378],
[378],
[378],
[378],
[379],
[379],
[380],
[380],
[381],
[381],
[382],
[382],
[383],
[383],
[384],
[384],
[385],
[385],
[386],
[386],
[387],
[387],
[388],
[388],
[389],
[389],
[390],
[390],
[],
[],
[],
[],
[391],
[391],
[392],
[392],
[393],
[393],
[394],
[394],
[],
[],
[395],
[395],
[396],
[396],
[397],
[397],
[398],
[398],
[399],
[399],
[400],
[400],
[401],
[401],
[402],
[402],
[403],
[403],
[404],
[404],
[],
[],
[405],
[405],
[406],
[406],
[407],
[407],
[],
[],
[],
[],
[408],
[408],
[409],
[409],
[410],
[410],
[411],
[411],
[412],
[412],
[413],
[413],
[414],
[414],
[415],
[415],
[416],
[416],
[417],
[417],
[418],
[418],
[419],
[419],
[419],
[419],
[420],
[420],
[421],
[421],
[422],
[422],
[423],
[423],
[424],
...]
This shows for each row in trip points, there are zero or one matches, indicating 25 meters was an appropriate choice. At 50 meters, some rows have more than one match.
In [21]:
max((len(x) for x in results_list))
Out[21]:
1
In [22]:
min((len(x) for x in results_list))
Out[22]:
0
In [23]:
# nan if no match, else row_id in df_cur
trip_points['match_id'] = np.array([np.float64(x)
if x.shape[0] > 0 else np.nan
for x in query_results.T])
In [24]:
trip_points.head()
Out[24]:
id
name
lat
lon
match_id
0
72
W 52 St & 11 Ave
40.767272
-73.993927
0.0
1
72
W 52 St & 11 Ave
40.767273
-73.993926
0.0
2
79
Franklin St & W Broadway
40.719118
-74.006668
1.0
3
79
Franklin St & W Broadway
40.719117
-74.006671
1.0
4
82
St James Pl & Pearl St
40.711174
-74.000166
2.0
In [25]:
df_cur['id'] = df_cur['id'].astype(np.float64)
In [26]:
lookup_table = trip_points.merge(df_cur, left_on='match_id', right_index=True, how='left')
lookup_table
Out[26]:
id_x
name
lat
lon
match_id
id_y
stationName
latitude
longitude
0
72
W 52 St & 11 Ave
40.767272
-73.993927
0.0
72.0
W 52 St & 11 Ave
40.767272
-73.993929
1
72
W 52 St & 11 Ave
40.767273
-73.993926
0.0
72.0
W 52 St & 11 Ave
40.767272
-73.993929
2
79
Franklin St & W Broadway
40.719118
-74.006668
1.0
79.0
Franklin St & W Broadway
40.719116
-74.006667
3
79
Franklin St & W Broadway
40.719117
-74.006671
1.0
79.0
Franklin St & W Broadway
40.719116
-74.006667
4
82
St James Pl & Pearl St
40.711174
-74.000166
2.0
82.0
St James Pl & Pearl St
40.711174
-74.000165
5
82
St James Pl & Pearl St
40.711174
-74.000167
2.0
82.0
St James Pl & Pearl St
40.711174
-74.000165
6
83
Atlantic Ave & Fort Greene Pl
40.683826
-73.976325
3.0
83.0
Atlantic Ave & Fort Greene Pl
40.683826
-73.976323
7
83
Atlantic Ave & Fort Greene Pl
40.683826
-73.976322
3.0
83.0
Atlantic Ave & Fort Greene Pl
40.683826
-73.976323
8
116
W 17 St & 8 Ave
40.741777
-74.001498
4.0
116.0
W 17 St & 8 Ave
40.741776
-74.001497
9
116
W 17 St & 8 Ave
40.741776
-74.001493
4.0
116.0
W 17 St & 8 Ave
40.741776
-74.001497
10
119
Park Ave & St Edwards St
40.696094
-73.978033
5.0
119.0
Park Ave & St Edwards St
40.696089
-73.978034
11
119
Park Ave & St Edwards St
40.696093
-73.978030
5.0
119.0
Park Ave & St Edwards St
40.696089
-73.978034
12
120
Lexington Ave & Classon Ave
40.686766
-73.959280
6.0
120.0
Lexington Ave & Classon Ave
40.686768
-73.959282
13
120
Lexington Ave & Classon Ave
40.686768
-73.959284
6.0
120.0
Lexington Ave & Classon Ave
40.686768
-73.959282
14
127
Barrow St & Hudson St
40.731725
-74.006747
7.0
127.0
Barrow St & Hudson St
40.731724
-74.006744
15
127
Barrow St & Hudson St
40.731723
-74.006745
7.0
127.0
Barrow St & Hudson St
40.731724
-74.006744
16
128
MacDougal St & Prince St
40.727104
-74.002966
8.0
128.0
MacDougal St & Prince St
40.727103
-74.002971
17
128
MacDougal St & Prince St
40.727104
-74.002966
8.0
128.0
MacDougal St & Prince St
40.727103
-74.002971
18
137
E 56 St & Madison Ave
40.761628
-73.972924
9.0
137.0
E 56 St & Madison Ave
40.761628
-73.972924
19
137
E 56 St & Madison Ave
40.761625
-73.972924
9.0
137.0
E 56 St & Madison Ave
40.761628
-73.972924
20
143
Clinton St & Joralemon St
40.692394
-73.993375
10.0
143.0
Clinton St & Joralemon St
40.692395
-73.993379
21
143
Clinton St & Joralemon St
40.692393
-73.993375
10.0
143.0
Clinton St & Joralemon St
40.692395
-73.993379
22
144
Nassau St & Navy St
40.698401
-73.980691
11.0
144.0
Nassau St & Navy St
40.698399
-73.980689
23
144
Nassau St & Navy St
40.698402
-73.980690
11.0
144.0
Nassau St & Navy St
40.698399
-73.980689
24
146
Hudson St & Reade St
40.716253
-74.009111
12.0
146.0
Hudson St & Reade St
40.716250
-74.009106
25
146
Hudson St & Reade St
40.716250
-74.009108
12.0
146.0
Hudson St & Reade St
40.716250
-74.009106
26
147
Greenwich St & Warren St
40.715423
-74.011220
13.0
147.0
Greenwich St & Warren St
40.715422
-74.011220
27
147
Greenwich St & Warren St
40.715424
-74.011226
13.0
147.0
Greenwich St & Warren St
40.715422
-74.011220
28
150
E 2 St & Avenue C
40.720874
-73.980854
14.0
150.0
E 2 St & Avenue C
40.720874
-73.980858
29
150
E 2 St & Avenue C
40.720875
-73.980855
14.0
150.0
E 2 St & Avenue C
40.720874
-73.980858
...
...
...
...
...
...
...
...
...
...
1383
3422
Columbia St & Degraw St
40.685927
-74.002429
638.0
3422.0
Columbia St & Degraw St
40.685930
-74.002424
1384
3423
West Drive & Prospect Park West
40.661064
-73.979458
639.0
3423.0
West Drive & Prospect Park West
40.661063
-73.979453
1385
3423
West Drive & Prospect Park West
40.661066
-73.979451
639.0
3423.0
West Drive & Prospect Park West
40.661063
-73.979453
1386
3424
E 106 St & Lexington Ave
40.791978
-73.945991
640.0
3424.0
E 106 St & Lexington Ave
40.791976
-73.945993
1387
3424
E 106 St & Lexington Ave
40.791976
-73.945994
640.0
3424.0
E 106 St & Lexington Ave
40.791976
-73.945993
1388
3425
2 Ave & E 104 St
40.789212
-73.943714
641.0
3425.0
2 Ave & E 104 St
40.789211
-73.943708
1389
3425
2 Ave & E 104 St
40.789211
-73.943708
641.0
3425.0
2 Ave & E 104 St
40.789211
-73.943708
1390
3427
Lafayette St & Jersey St
40.724303
-73.996004
642.0
3427.0
Lafayette St & Jersey St
40.724305
-73.996010
1391
3427
Lafayette St & Jersey St
40.724304
-73.996006
642.0
3427.0
Lafayette St & Jersey St
40.724305
-73.996010
1392
3428
8 Ave & W 16 St
40.740986
-74.001695
643.0
3428.0
8 Ave & W 16 St
40.740983
-74.001702
1393
3428
8 Ave & W 16 St
40.740985
-74.001704
643.0
3428.0
8 Ave & W 16 St
40.740983
-74.001702
1394
3429
Hanson Pl & Ashland Pl
40.685067
-73.977907
644.0
3429.0
Hanson Pl & Ashland Pl
40.685068
-73.977908
1395
3429
Hanson Pl & Ashland Pl
40.685065
-73.977906
644.0
3429.0
Hanson Pl & Ashland Pl
40.685068
-73.977908
1396
3430
Richardson St & N Henry St
40.719078
-73.942233
645.0
3430.0
Richardson St & N Henry St
40.719079
-73.942237
1397
3430
Richardson St & N Henry St
40.719081
-73.942231
645.0
3430.0
Richardson St & N Henry St
40.719079
-73.942237
1398
3431
E 35 St & 3 Ave
40.746526
-73.977886
646.0
3431.0
E 35 St & 3 Ave
40.746524
-73.977885
1399
3431
E 35 St & 3 Ave
40.746523
-73.977879
646.0
3431.0
E 35 St & 3 Ave
40.746524
-73.977885
1400
3432
Bike in Movie Night | Prospect Park Bandshell
40.662987
-73.976919
NaN
NaN
NaN
NaN
NaN
1401
3432
Bike in Movie Night | Prospect Park Bandshell
40.662988
-73.976922
NaN
NaN
NaN
NaN
NaN
1402
3434
W 88 St & West End Ave
40.790251
-73.977181
647.0
3434.0
W 88 St & West End Ave
40.790254
-73.977183
1403
3434
W 88 St & West End Ave
40.790254
-73.977179
647.0
3434.0
W 88 St & West End Ave
40.790254
-73.977183
1404
3435
Grand St & Elizabeth St
40.718822
-73.995959
648.0
3435.0
Grand St & Elizabeth St
40.718822
-73.995960
1405
3435
Grand St & Elizabeth St
40.718820
-73.995958
648.0
3435.0
Grand St & Elizabeth St
40.718822
-73.995960
1406
3436
Greenwich St & Hubert St
40.721319
-74.010065
649.0
3436.0
Greenwich St & Hubert St
40.721319
-74.010065
1407
3436
Greenwich St & Hubert St
40.721318
-74.010060
649.0
3436.0
Greenwich St & Hubert St
40.721319
-74.010065
1408
3438
E 76 St & 3 Ave
40.772249
-73.958419
650.0
3438.0
E 76 St & 3 Ave
40.772249
-73.958421
1409
3438
E 76 St & 3 Ave
40.772249
-73.958419
650.0
3438.0
E 76 St & 3 Ave
40.772249
-73.958421
1410
3439
Broadway & E 22 St - Valet Scan
40.740345
-73.989548
169.0
402.0
Broadway & E 22 St
40.740343
-73.989551
1411
3440
Fulton St & Adams St
40.692416
-73.989497
651.0
3440.0
Fulton St & Adams St
40.692418
-73.989495
1412
3440
Fulton St & Adams St
40.692417
-73.989502
651.0
3440.0
Fulton St & Adams St
40.692418
-73.989495
1413 rows × 9 columns
In [27]:
defunct_stations = lookup_table[np.isnan(lookup_table.match_id)]
defunct_stations
Out[27]:
id_x
name
lat
lon
match_id
id_y
stationName
latitude
longitude
38
160
E 37 St & Lexington Ave
40.748195
-73.978309
NaN
NaN
NaN
NaN
NaN
39
160
E 37 St & Lexington Ave
40.748192
-73.978310
NaN
NaN
NaN
NaN
NaN
60
218
Gallatin Pl & Livingston St
40.690286
-73.987071
NaN
NaN
NaN
NaN
NaN
61
218
Gallatin Pl & Livingston St
40.690285
-73.987069
NaN
NaN
NaN
NaN
NaN
75
233
Cadman Plaza W & Pierrepont St
40.694757
-73.990522
NaN
NaN
NaN
NaN
NaN
77
233
Cadman Plaza W & Pierrepont St
40.694754
-73.990530
NaN
NaN
NaN
NaN
NaN
88
242
Flushing Ave & Carlton Ave
40.697882
-73.973503
NaN
NaN
NaN
NaN
NaN
91
242
Flushing Ave & Carlton Ave
40.697882
-73.973503
NaN
NaN
NaN
NaN
NaN
92
243
Fulton St & Rockwell Pl
40.688151
-73.979105
NaN
NaN
NaN
NaN
NaN
93
243
Fulton St & Rockwell Pl
40.688129
-73.979028
NaN
NaN
NaN
NaN
NaN
104
250
Lafayette St & Jersey St
40.724560
-73.995651
NaN
NaN
NaN
NaN
NaN
105
250
Lafayette St & Jersey St
40.724557
-73.995650
NaN
NaN
NaN
NaN
NaN
106
250
Lafayette St & Jersey St N
40.724560
-73.995653
NaN
NaN
NaN
NaN
NaN
107
250
Lafayette St & Jersey St N
40.724559
-73.995649
NaN
NaN
NaN
NaN
NaN
116
255
NYCBS Depot - SSP
40.646765
-74.016513
NaN
NaN
NaN
NaN
NaN
117
255
NYCBS Depot - SSP
40.646766
-74.016510
NaN
NaN
NaN
NaN
NaN
130
263
Elizabeth St & Hester St
40.717329
-73.996356
NaN
NaN
NaN
NaN
NaN
131
263
Elizabeth St & Hester St
40.717329
-73.996351
NaN
NaN
NaN
NaN
NaN
144
271
Ashland Pl & Hanson Pl
40.685276
-73.978059
NaN
NaN
NaN
NaN
NaN
145
271
Ashland Pl & Hanson Pl
40.685281
-73.978057
NaN
NaN
NaN
NaN
NaN
154
279
Sands St & Gold St
40.699870
-73.982716
NaN
NaN
NaN
NaN
NaN
159
279
Sands St & Gold St
40.699870
-73.982720
NaN
NaN
NaN
NaN
NaN
164
282
Kent Ave & S 11 St
40.708024
-73.968367
NaN
NaN
NaN
NaN
NaN
165
282
Kent Ave & S 11 St
40.708025
-73.968372
NaN
NaN
NaN
NaN
NaN
172
290
2 Ave & E 58 St
40.760204
-73.964786
NaN
NaN
NaN
NaN
NaN
173
290
2 Ave & E 58 St
40.760206
-73.964780
NaN
NaN
NaN
NaN
NaN
176
293
Lafayette St & E 8 St
40.730284
-73.990770
NaN
NaN
NaN
NaN
NaN
177
293
Lafayette St & E 8 St
40.730285
-73.990771
NaN
NaN
NaN
NaN
NaN
178
294
Washington Square E
40.730497
-73.995719
NaN
NaN
NaN
NaN
NaN
179
294
Washington Square E
40.730495
-73.995721
NaN
NaN
NaN
NaN
NaN
...
...
...
...
...
...
...
...
...
...
1053
3229
Marcy Ave & MacDonough St
40.680620
-73.946245
NaN
NaN
NaN
NaN
NaN
1054
3230
Penn Station Valet
40.751282
-73.996927
NaN
NaN
NaN
NaN
NaN
1055
3230
Penn Station Valet
40.751279
-73.996926
NaN
NaN
NaN
NaN
NaN
1062
3234
E 40 St & Madison Ave
40.751593
-73.980427
NaN
NaN
NaN
NaN
NaN
1063
3234
E 40 St & Madison Ave
40.751592
-73.980430
NaN
NaN
NaN
NaN
NaN
1070
3237
21 St & 41 Ave
40.753832
-73.942681
NaN
NaN
NaN
NaN
NaN
1071
3237
21 St & 41 Ave
40.753833
-73.942676
NaN
NaN
NaN
NaN
NaN
1074
3239
Bressler
40.646539
-74.016588
NaN
NaN
NaN
NaN
NaN
1083
3245
Kiosk in a box Motivate
40.646677
-74.016298
NaN
NaN
NaN
NaN
NaN
1084
3246
Montague St & Clinton St
40.694282
-73.992300
NaN
NaN
NaN
NaN
NaN
1085
3246
Montague St & Clinton St
40.694284
-73.992301
NaN
NaN
NaN
NaN
NaN
1088
3250
NYCBS Depot - PIT
40.716912
-73.983843
NaN
NaN
NaN
NaN
NaN
1089
3250
NYCBS Depot - PIT
40.716913
-73.983843
NaN
NaN
NaN
NaN
NaN
1092
3253
Bike The Branches - Central Branch
40.672804
-73.968868
NaN
NaN
NaN
NaN
NaN
1093
3253
Bike The Branches - Central Branch
40.672806
-73.968864
NaN
NaN
NaN
NaN
NaN
1094
3254
Soissons Landing
40.692320
-74.014862
NaN
NaN
NaN
NaN
NaN
1095
3254
Soissons Landing
40.692319
-74.014863
NaN
NaN
NaN
NaN
NaN
1100
3257
Adventures NYC
40.773769
-73.971169
NaN
NaN
NaN
NaN
NaN
1101
3257
Adventures NYC
40.773771
-73.971169
NaN
NaN
NaN
NaN
NaN
1110
3264
E 61 St & Park Ave
40.764207
-73.969101
NaN
NaN
NaN
NaN
NaN
1111
3264
E 61 St & Park Ave
40.764210
-73.969101
NaN
NaN
NaN
NaN
NaN
1112
3265
E 2 St & 2 Ave
40.724565
-73.989439
NaN
NaN
NaN
NaN
NaN
1113
3265
E 2 St & 2 Ave
40.724564
-73.989439
NaN
NaN
NaN
NaN
NaN
1114
3266
Kiosk in a box Deployment
40.708611
-73.928507
NaN
NaN
NaN
NaN
NaN
1115
3266
Kiosk in a box Deployment
40.708609
-73.928510
NaN
NaN
NaN
NaN
NaN
1138
3291
Riverside Dr & W 91 St
40.793373
-73.976851
NaN
NaN
NaN
NaN
NaN
1139
3291
Riverside Dr & W 91 St
40.793372
-73.976853
NaN
NaN
NaN
NaN
NaN
1311
3385
2 Ave & E 105 St
40.789818
-73.942963
NaN
NaN
NaN
NaN
NaN
1400
3432
Bike in Movie Night | Prospect Park Bandshell
40.662987
-73.976919
NaN
NaN
NaN
NaN
NaN
1401
3432
Bike in Movie Night | Prospect Park Bandshell
40.662988
-73.976922
NaN
NaN
NaN
NaN
NaN
174 rows × 9 columns
In [28]:
defunct_stations = defunct_stations[['id_x', 'name']].drop_duplicates()
defunct_stations
Out[28]:
id_x
name
38
160
E 37 St & Lexington Ave
60
218
Gallatin Pl & Livingston St
75
233
Cadman Plaza W & Pierrepont St
88
242
Flushing Ave & Carlton Ave
92
243
Fulton St & Rockwell Pl
104
250
Lafayette St & Jersey St
106
250
Lafayette St & Jersey St N
116
255
NYCBS Depot - SSP
130
263
Elizabeth St & Hester St
144
271
Ashland Pl & Hanson Pl
154
279
Sands St & Gold St
164
282
Kent Ave & S 11 St
172
290
2 Ave & E 58 St
176
293
Lafayette St & E 8 St
178
294
Washington Square E
186
298
3 Ave & Schermerhorn St
188
300
Shevchenko Pl & E 7 St
189
300
Shevchenko Pl & E 6 St
222
314
Montague St & Clinton St
230
318
E 43 St & Vanderbilt Ave
233
319
Park Pl & Church St
236
320
Church St & Leonard St
256
329
Greenwich St & N Moore St
258
329
Greenwich St & North Moore St
290
347
W Houston St & Hudson St
332
367
E 53 St & Lexington Ave
342
375
Mercer St & Bleecker St
343
375
Mercer St & Bleecker St S
360
384
Fulton St & Waverly Ave
400
404
9 Ave & W 14 St
...
...
...
857
3104
KentAve&N7 St
858
3104
Kent Ave & N 7 St
919
3133
E 67 St & Lexington Ave
920
3133
E 67 St & Park Ave
931
3138
E 77 St & Park Ave
953
3149
E 82 St & 2 Ave
961
3153
E 71 St & 2 Ave
963
3154
E 77 St & 3 Ave
1017
3181
Soissons Landing
1019
3182
Yankee Ferry Terminal
1024
3186
Grove St PATH
1038
3219
NYCBS Depot - STY
1043
3222
Hanson Pl & St Felix St
1052
3229
Marcy Ave & MacDonough St
1054
3230
Penn Station Valet
1062
3234
E 40 St & Madison Ave
1070
3237
21 St & 41 Ave
1074
3239
Bressler
1083
3245
Kiosk in a box Motivate
1084
3246
Montague St & Clinton St
1088
3250
NYCBS Depot - PIT
1092
3253
Bike The Branches - Central Branch
1094
3254
Soissons Landing
1100
3257
Adventures NYC
1110
3264
E 61 St & Park Ave
1112
3265
E 2 St & 2 Ave
1114
3266
Kiosk in a box Deployment
1138
3291
Riverside Dr & W 91 St
1311
3385
2 Ave & E 105 St
1400
3432
Bike in Movie Night | Prospect Park Bandshell
90 rows × 2 columns
In [29]:
defunct_trips_start = df.merge(defunct_stations, left_on=['start_station_id', 'start_station_name'],
right_on=['id_x', 'name'])
defunct_trips_end = df.merge(defunct_stations, left_on=['end_station_id', 'end_station_name'],
right_on=['id_x', 'name'])
defunct_trips_all = (defunct_trips_start.append(defunct_trips_end).compute().drop_duplicates())
defunct_trips_all
Out[29]:
trip_duration
start_time
stop_time
start_station_id
start_station_name
start_station_latitude
start_station_longitude
end_station_id
end_station_name
end_station_latitude
end_station_longitude
bike_id
user_type
birth_year
gender
id_x
name
0
178
2013-07-01 00:01:04
2013-07-01 00:04:02
293
Lafayette St & E 8 St
40.730286
-73.990768
237
E 11 St & 2 Ave
40.730473
-73.986725
14548
Subscriber
1980.0
2
293
Lafayette St & E 8 St
1
550
2013-07-01 00:01:59
2013-07-01 00:11:09
293
Lafayette St & E 8 St
40.730286
-73.990768
394
E 9 St & Avenue C
40.725212
-73.977684
16746
Customer
NaN
0
293
Lafayette St & E 8 St
2
615
2013-07-01 05:48:21
2013-07-01 05:58:36
293
Lafayette St & E 8 St
40.730286
-73.990768
507
E 25 St & 2 Ave
40.739124
-73.979736
19080
Subscriber
1984.0
2
293
Lafayette St & E 8 St
3
307
2013-07-01 06:47:47
2013-07-01 06:52:54
293
Lafayette St & E 8 St
40.730286
-73.990768
445
E 10 St & Avenue A
40.727409
-73.981422
16105
Subscriber
1973.0
1
293
Lafayette St & E 8 St
4
387
2013-07-01 07:11:46
2013-07-01 07:18:13
293
Lafayette St & E 8 St
40.730286
-73.990768
497
E 17 St & Broadway
40.737049
-73.990089
15952
Subscriber
1954.0
1
293
Lafayette St & E 8 St
5
481
2013-07-01 07:13:30
2013-07-01 07:21:31
293
Lafayette St & E 8 St
40.730286
-73.990768
469
Broadway & W 53 St
40.763439
-73.982681
16108
Subscriber
1968.0
1
293
Lafayette St & E 8 St
6
915
2013-07-01 07:42:55
2013-07-01 07:58:10
293
Lafayette St & E 8 St
40.730286
-73.990768
456
E 53 St & Madison Ave
40.759712
-73.974022
18485
Subscriber
1983.0
1
293
Lafayette St & E 8 St
7
332
2013-07-01 07:53:56
2013-07-01 07:59:28
293
Lafayette St & E 8 St
40.730286
-73.990768
380
W 4 St & 7 Ave S
40.734013
-74.002937
19540
Subscriber
1983.0
1
293
Lafayette St & E 8 St
8
687
2013-07-01 07:53:59
2013-07-01 08:05:26
293
Lafayette St & E 8 St
40.730286
-73.990768
519
Pershing Square N
40.751884
-73.977699
16270
Subscriber
1981.0
1
293
Lafayette St & E 8 St
9
472
2013-07-01 07:58:52
2013-07-01 08:06:44
293
Lafayette St & E 8 St
40.730286
-73.990768
383
Greenwich Ave & Charles St
40.735237
-74.000275
15830
Subscriber
1989.0
2
293
Lafayette St & E 8 St
10
201
2013-07-01 08:03:14
2013-07-01 08:06:35
293
Lafayette St & E 8 St
40.730286
-73.990768
445
E 10 St & Avenue A
40.727409
-73.981422
17172
Subscriber
1983.0
1
293
Lafayette St & E 8 St
11
1473
2013-07-01 08:07:14
2013-07-01 08:31:47
293
Lafayette St & E 8 St
40.730286
-73.990768
484
W 44 St & 5 Ave
40.755001
-73.980141
18883
Subscriber
1979.0
1
293
Lafayette St & E 8 St
12
1368
2013-07-01 08:09:13
2013-07-01 08:32:01
293
Lafayette St & E 8 St
40.730286
-73.990768
522
E 51 St & Lexington Ave
40.757149
-73.972076
16236
Subscriber
1961.0
2
293
Lafayette St & E 8 St
13
507
2013-07-01 08:20:27
2013-07-01 08:28:54
293
Lafayette St & E 8 St
40.730286
-73.990768
151
Cleveland Pl & Spring St
40.721817
-73.997200
17838
Subscriber
1985.0
2
293
Lafayette St & E 8 St
14
470
2013-07-01 08:22:13
2013-07-01 08:30:03
293
Lafayette St & E 8 St
40.730286
-73.990768
546
E 30 St & Park Ave S
40.744450
-73.983032
18607
Subscriber
1985.0
1
293
Lafayette St & E 8 St
15
673
2013-07-01 08:23:45
2013-07-01 08:34:58
293
Lafayette St & E 8 St
40.730286
-73.990768
546
E 30 St & Park Ave S
40.744450
-73.983032
18849
Subscriber
1981.0
2
293
Lafayette St & E 8 St
16
460
2013-07-01 08:25:19
2013-07-01 08:32:59
293
Lafayette St & E 8 St
40.730286
-73.990768
377
6 Ave & Canal St
40.722439
-74.005661
17104
Subscriber
1970.0
1
293
Lafayette St & E 8 St
17
558
2013-07-01 08:30:25
2013-07-01 08:39:43
293
Lafayette St & E 8 St
40.730286
-73.990768
320
Church St & Leonard St
40.717442
-74.005836
18092
Subscriber
1976.0
1
293
Lafayette St & E 8 St
18
458
2013-07-01 08:37:16
2013-07-01 08:44:54
293
Lafayette St & E 8 St
40.730286
-73.990768
445
E 10 St & Avenue A
40.727409
-73.981422
19981
Customer
NaN
0
293
Lafayette St & E 8 St
19
1119
2013-07-01 08:44:37
2013-07-01 09:03:16
293
Lafayette St & E 8 St
40.730286
-73.990768
330
Reade St & Broadway
40.714504
-74.005630
17955
Subscriber
1973.0
1
293
Lafayette St & E 8 St
20
453
2013-07-01 08:50:18
2013-07-01 08:57:51
293
Lafayette St & E 8 St
40.730286
-73.990768
323
Lawrence St & Willoughby St
40.692360
-73.986320
15215
Subscriber
1974.0
2
293
Lafayette St & E 8 St
21
453
2013-07-01 08:50:39
2013-07-01 08:58:12
293
Lafayette St & E 8 St
40.730286
-73.990768
151
Cleveland Pl & Spring St
40.721817
-73.997200
18613
Subscriber
1953.0
2
293
Lafayette St & E 8 St
22
1055
2013-07-01 09:02:12
2013-07-01 09:19:47
293
Lafayette St & E 8 St
40.730286
-73.990768
360
William St & Pine St
40.707180
-74.008873
19020
Subscriber
1986.0
1
293
Lafayette St & E 8 St
23
235
2013-07-01 09:03:56
2013-07-01 09:07:51
293
Lafayette St & E 8 St
40.730286
-73.990768
445
E 10 St & Avenue A
40.727409
-73.981422
16587
Subscriber
1981.0
1
293
Lafayette St & E 8 St
24
813
2013-07-01 09:06:01
2013-07-01 09:19:34
293
Lafayette St & E 8 St
40.730286
-73.990768
79
Franklin St & W Broadway
40.719116
-74.006668
14943
Customer
NaN
0
293
Lafayette St & E 8 St
25
488
2013-07-01 09:16:12
2013-07-01 09:24:20
293
Lafayette St & E 8 St
40.730286
-73.990768
410
Suffolk St & Stanton St
40.720665
-73.985176
16821
Subscriber
1983.0
2
293
Lafayette St & E 8 St
26
663
2013-07-01 09:20:18
2013-07-01 09:31:21
293
Lafayette St & E 8 St
40.730286
-73.990768
419
Carlton Ave & Park Ave
40.695808
-73.973557
20289
Subscriber
1969.0
1
293
Lafayette St & E 8 St
27
582
2013-07-01 09:27:02
2013-07-01 09:36:44
293
Lafayette St & E 8 St
40.730286
-73.990768
377
6 Ave & Canal St
40.722439
-74.005661
15740
Subscriber
1954.0
2
293
Lafayette St & E 8 St
28
369
2013-07-01 09:34:34
2013-07-01 09:40:43
293
Lafayette St & E 8 St
40.730286
-73.990768
419
Carlton Ave & Park Ave
40.695808
-73.973557
17653
Subscriber
1985.0
2
293
Lafayette St & E 8 St
29
1443
2013-07-01 09:37:06
2013-07-01 10:01:09
293
Lafayette St & E 8 St
40.730286
-73.990768
363
West Thames St
40.708347
-74.017136
17753
Subscriber
1963.0
2
293
Lafayette St & E 8 St
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
819
849
2016-12-30 17:23:53
2016-12-30 17:38:03
2021
W 45 St & 8 Ave
40.759293
-73.988594
2017
E 43 St & 2 Ave
40.750225
-73.971214
21425
Subscriber
1971.0
1
2017
E 43 St & 2 Ave
820
176
2016-12-30 18:31:55
2016-12-30 18:34:51
516
E 47 St & 1 Ave
40.752068
-73.967842
2017
E 43 St & 2 Ave
40.750225
-73.971214
18189
Subscriber
1947.0
1
2017
E 43 St & 2 Ave
821
542
2016-12-30 18:53:13
2016-12-30 19:02:16
3134
3 Ave & E 62 St
40.763126
-73.965271
2017
E 43 St & 2 Ave
40.750225
-73.971214
26070
Subscriber
1984.0
1
2017
E 43 St & 2 Ave
822
644
2016-12-30 20:12:49
2016-12-30 20:23:33
507
E 25 St & 2 Ave
40.739124
-73.979736
2017
E 43 St & 2 Ave
40.750225
-73.971214
18769
Subscriber
1979.0
1
2017
E 43 St & 2 Ave
823
447
2016-12-30 20:15:16
2016-12-30 20:22:43
520
W 52 St & 5 Ave
40.759922
-73.976486
2017
E 43 St & 2 Ave
40.750225
-73.971214
15298
Subscriber
1993.0
1
2017
E 43 St & 2 Ave
824
631
2016-12-30 20:36:26
2016-12-30 20:46:58
3264
E 61 St & Park Ave
40.764210
-73.969101
2017
E 43 St & 2 Ave
40.750225
-73.971214
25940
Subscriber
1975.0
1
2017
E 43 St & 2 Ave
825
555
2016-12-30 20:38:28
2016-12-30 20:47:43
520
W 52 St & 5 Ave
40.759922
-73.976486
2017
E 43 St & 2 Ave
40.750225
-73.971214
26498
Subscriber
1981.0
2
2017
E 43 St & 2 Ave
826
599
2016-12-30 20:52:42
2016-12-30 21:02:41
476
E 31 St & 3 Ave
40.743942
-73.979660
2017
E 43 St & 2 Ave
40.750225
-73.971214
19728
Subscriber
1956.0
1
2017
E 43 St & 2 Ave
827
675
2016-12-30 20:52:45
2016-12-30 21:04:01
476
E 31 St & 3 Ave
40.743942
-73.979660
2017
E 43 St & 2 Ave
40.750225
-73.971214
23888
Subscriber
1956.0
2
2017
E 43 St & 2 Ave
828
658
2016-12-30 23:59:36
2016-12-31 00:10:35
497
E 17 St & Broadway
40.737049
-73.990089
2017
E 43 St & 2 Ave
40.750225
-73.971214
22829
Subscriber
1988.0
1
2017
E 43 St & 2 Ave
829
1301
2016-12-31 07:07:37
2016-12-31 07:29:18
422
W 59 St & 10 Ave
40.770512
-73.988037
2017
E 43 St & 2 Ave
40.750225
-73.971214
21634
Subscriber
1988.0
1
2017
E 43 St & 2 Ave
830
605
2016-12-31 09:59:15
2016-12-31 10:09:21
537
Lexington Ave & E 24 St
40.740257
-73.984093
2017
E 43 St & 2 Ave
40.750225
-73.971214
14816
Subscriber
1986.0
1
2017
E 43 St & 2 Ave
831
356
2016-12-31 18:31:10
2016-12-31 18:37:07
305
E 58 St & 3 Ave
40.760956
-73.967247
2017
E 43 St & 2 Ave
40.750225
-73.971214
16863
Subscriber
1986.0
1
2017
E 43 St & 2 Ave
832
320
2016-12-31 18:49:12
2016-12-31 18:54:33
454
E 51 St & 1 Ave
40.754559
-73.965927
2017
E 43 St & 2 Ave
40.750225
-73.971214
25544
Subscriber
1948.0
1
2017
E 43 St & 2 Ave
833
292
2016-12-29 10:46:47
2016-12-29 10:51:40
432
E 7 St & Avenue A
40.726219
-73.983795
3219
NYCBS Depot - STY
40.729195
-73.976654
17981
Subscriber
1991.0
1
3219
NYCBS Depot - STY
834
843
2016-12-29 18:48:21
2016-12-29 19:02:25
2010
Grand St & Greene St
40.721653
-74.002350
3219
NYCBS Depot - STY
40.729195
-73.976654
19636
Subscriber
1986.0
1
3219
NYCBS Depot - STY
835
489
2016-12-30 11:14:24
2016-12-30 11:22:33
475
E 16 St & Irving Pl
40.735241
-73.987587
3219
NYCBS Depot - STY
40.729195
-73.976654
25244
Subscriber
1961.0
1
3219
NYCBS Depot - STY
836
425
2016-12-30 14:47:24
2016-12-30 14:54:30
285
Broadway & E 14 St
40.734547
-73.990738
3219
NYCBS Depot - STY
40.729195
-73.976654
25347
Subscriber
1992.0
1
3219
NYCBS Depot - STY
837
10118
2016-12-30 17:34:29
2016-12-30 20:23:07
307
Canal St & Rutgers St
40.714275
-73.989899
3219
NYCBS Depot - STY
40.729195
-73.976654
19385
Customer
NaN
0
3219
NYCBS Depot - STY
838
364
2016-12-30 18:41:52
2016-12-30 18:47:57
504
1 Ave & E 16 St
40.732220
-73.981659
3219
NYCBS Depot - STY
40.729195
-73.976654
21322
Subscriber
1992.0
1
3219
NYCBS Depot - STY
846
774
2016-12-29 16:04:17
2016-12-29 16:17:12
500
Broadway & W 51 St
40.762287
-73.983360
3017
NYCBS Depot - FAR
40.751484
-73.996765
25002
Subscriber
1988.0
1
3017
NYCBS Depot - FAR
847
1767
2016-12-29 19:31:28
2016-12-29 20:00:55
3345
Madison Ave & E 99 St
40.789486
-73.952431
3237
21 St & 41 Ave
40.753834
-73.942680
26371
Subscriber
1963.0
1
3237
21 St & 41 Ave
848
896
2016-12-30 13:26:43
2016-12-30 13:41:39
3113
Greenpoint Ave & Manhattan Ave
40.730259
-73.953941
3237
21 St & 41 Ave
40.753834
-73.942680
25186
Subscriber
1978.0
1
3237
21 St & 41 Ave
849
253
2016-12-30 13:44:19
2016-12-30 13:48:32
3126
44 Dr & Jackson Ave
40.747181
-73.943260
3237
21 St & 41 Ave
40.753834
-73.942680
26703
Subscriber
1947.0
1
3237
21 St & 41 Ave
850
1754
2016-12-30 14:37:57
2016-12-30 15:07:11
402
Broadway & E 22 St
40.740345
-73.989548
3237
21 St & 41 Ave
40.753834
-73.942680
25947
Subscriber
1963.0
1
3237
21 St & 41 Ave
851
363
2016-12-31 01:29:36
2016-12-31 01:35:39
3121
Jackson Ave & 46 Rd
40.745247
-73.947334
3237
21 St & 41 Ave
40.753834
-73.942680
16471
Subscriber
1959.0
2
3237
21 St & 41 Ave
852
1580
2016-12-31 11:26:50
2016-12-31 11:53:11
3119
Vernon Blvd & 50 Ave
40.742329
-73.954117
3237
21 St & 41 Ave
40.753834
-73.942680
26738
Subscriber
1956.0
2
3237
21 St & 41 Ave
853
2590
2016-12-31 13:21:52
2016-12-31 14:05:02
261
Johnson St & Gold St
40.694748
-73.983627
3237
21 St & 41 Ave
40.753834
-73.942680
20954
Subscriber
1982.0
1
3237
21 St & 41 Ave
854
329
2016-12-31 13:27:26
2016-12-31 13:32:55
3126
44 Dr & Jackson Ave
40.747181
-73.943260
3237
21 St & 41 Ave
40.753834
-73.942680
25327
Subscriber
1996.0
2
3237
21 St & 41 Ave
855
1637
2016-12-30 08:38:12
2016-12-30 09:05:30
3319
14 St & 5 Ave
40.666286
-73.988953
255
NYCBS Depot - SSP
40.646767
-74.016510
24953
Subscriber
1988.0
1
255
NYCBS Depot - SSP
5521461 rows × 17 columns
In [56]:
defunct_start_trips_count = defunct_trips_all[['start_station_id', 'start_station_name', 'trip_duration']].groupby(['start_station_id', 'start_station_name']).count().sort_values('trip_duration',
ascending=False)
bad_ids = defunct_start_trips_count[defunct_start_trips_count.trip_duration < 50].reset_index()[['start_station_id']]
defunct_start_trips_count[defunct_start_trips_count.trip_duration < 50]
Out[56]:
trip_duration
start_station_id
start_station_name
3423
West Drive & Prospect Park West
49
3329
Degraw St & Smith St
48
3414
Bergen St & Flatbush Ave
47
3339
Berkeley Pl & 6 Ave
45
3438
E 76 St & 3 Ave
45
3363
E 102 St & Park Ave
43
3404
7 St & 5 Ave
43
3419
Douglass St & 4 Ave
42
3355
E 66 St & Madison Ave
42
3328
W 100 St & Manhattan Ave
40
3316
W 104 St & Amsterdam Ave
39
3366
West End Ave & W 107 St
39
3399
7 St & 3 Ave
38
3347
Van Brunt St & Wolcott St
38
3322
12 St & 4 Ave
38
3343
W 107 St & Columbus Ave
37
3354
3 St & Prospect Park West
36
3356
Amsterdam Ave & W 66 St
36
3310
14 St & 7 Ave
36
3130
21 St & Queens Plaza North
36
3319
14 St & 5 Ave
36
3317
10 St & 5 Ave
35
3358
Garfield Pl & 8 Ave
34
3390
E 109 St & 3 Ave
34
3365
3 St & 7 Ave
33
3346
Berkeley Pl & 7 Ave
31
3384
Smith St & 3 St
31
3250
NYCBS Depot - PIT
31
3413
Wyckoff St & 3 Ave
30
3351
E 102 St & 1 Ave
30
...
...
...
3300
Prospect Park West & 8 St
19
3370
78 St & 2 Ave
18
3340
Wolcott St & Dwight St
18
3239
Bressler
18
3327
3 Ave & E 100 St
17
3014
3969.TEMP (Bike The Branches - Central Branch)
16
3338
2 Ave & E 99 St
15
3398
Smith St & 9 St
14
3391
E 106 St & 1 Ave
14
3348
Coffey St & Conover St
14
3017
NYCBS Depot - FAR
13
3040
SSP Tech Workshop
12
3440
Fulton St & Adams St
12
3387
E 106 St & Madison Ave
12
3036
8D OPS 01
10
3342
Pioneer St & Richards St
10
3379
E 103 St & Lexington Ave
9
3405
5 St & 6 Ave
9
3266
Kiosk in a box Deployment
7
3393
Richards St & Delavan St
7
3333
Columbia St & Lorraine St
7
3395
Henry St & W 9 St
6
3424
E 106 St & Lexington Ave
6
3394
Columbia St & W 9 St
4
3330
Henry St & Bay St
3
3326
Clinton St & Centre St
3
3252
Bike The Branches - Red Hook Branch
2
3371
4 Ave & 2 St
1
3385
2 Ave & E 105 St
1
3240
NYCBS Depot BAL - DYR
1
77 rows × 1 columns
In [57]:
defunct_end_trips_count = defunct_trips_all[[
'end_station_id', 'end_station_name', 'trip_duration']].groupby(
['end_station_id', 'end_station_name']).count().sort_values('trip_duration', ascending=False)
bad_ids_2 = (
defunct_end_trips_count[defunct_end_trips_count.trip_duration < 50].reset_index()
)[['end_station_id']]
defunct_end_trips_count[defunct_end_trips_count.trip_duration < 50]
Out[57]:
trip_duration
end_station_id
end_station_name
3423
West Drive & Prospect Park West
49
3341
Central Park West & W 102 St
48
3414
Bergen St & Flatbush Ave
47
3283
W 89 St & Columbus Ave
44
3404
7 St & 5 Ave
43
3344
Pioneer St & Van Brunt St
39
3353
Reed St & Van Brunt St
37
3413
Wyckoff St & 3 Ave
37
3384
Smith St & 3 St
36
3407
Union St & Nevins St
36
3347
Van Brunt St & Wolcott St
35
3322
12 St & 4 Ave
35
3356
Amsterdam Ave & W 66 St
34
3351
E 102 St & 1 Ave
34
3317
10 St & 5 Ave
33
3304
6 Ave & 9 St
32
3438
E 76 St & 3 Ave
31
3320
Central Park West & W 100 St
30
3392
Commerce St & Van Brunt St
30
3319
14 St & 5 Ave
30
3310
14 St & 7 Ave
30
3345
Madison Ave & E 99 St
30
3403
4 Ave & 9 St
27
3316
W 104 St & Amsterdam Ave
27
3363
E 102 St & Park Ave
26
3425
2 Ave & E 104 St
25
3166
Riverside Dr & W 72 St
24
3434
W 88 St & West End Ave
23
3343
W 107 St & Columbus Ave
22
3309
E 97 St & 3 Ave
22
...
...
...
3354
3 St & Prospect Park West
15
3381
3 St & Hoyt St
15
3313
6 Ave & 12 St
15
3333
Columbia St & Lorraine St
14
3240
NYCBS Depot BAL - DYR
14
3401
2 Ave & 9 St
12
3326
Clinton St & Centre St
12
3358
Garfield Pl & 8 Ave
11
3393
Richards St & Delavan St
11
3036
8D OPS 01
10
3330
Henry St & Bay St
10
3327
3 Ave & E 100 St
10
3424
E 106 St & Lexington Ave
9
3040
SSP Tech Workshop
9
3405
5 St & 6 Ave
8
3346
Berkeley Pl & 7 Ave
7
3302
Columbus Ave & W 103 St
7
3252
Bike The Branches - Red Hook Branch
7
3379
E 103 St & Lexington Ave
7
3440
Fulton St & Adams St
7
3266
Kiosk in a box Deployment
6
3370
78 St & 2 Ave
6
3186
Grove St PATH
6
3338
2 Ave & E 99 St
5
3014
3969.TEMP (Bike The Branches - Central Branch)
3
3395
Henry St & W 9 St
3
3385
2 Ave & E 105 St
2
3394
Columbia St & W 9 St
1
3187
Warren St
1
3439
Broadway & E 22 St - Valet Scan
1
77 rows × 1 columns
In [68]:
bad_ids.columns =['id']
bad_ids_2.columns = ['id']
bad_ids = bad_ids.append(bad_ids_2).drop_duplicates().sort_values('id').reset_index(drop=True)
from IPython.display import display, HTML
display(HTML(bad_ids.T.to_html()))
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
id
3014
3017
3036
3040
3130
3166
3186
3187
3239
3240
3250
3252
3266
3283
3297
3300
3302
3304
3306
3309
3310
3313
3316
3317
3319
3320
3322
3326
3327
3328
3329
3330
3332
3333
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3351
3353
3354
3355
3356
3358
3363
3365
3366
3370
3371
3379
3381
3384
3385
3387
3390
3391
3392
3393
3394
3395
3398
3399
3401
3403
3404
3405
3407
3413
3414
3419
3421
3423
3424
3425
3432
3434
3438
3439
3440
In [73]:
bad_ids.id.T.values
Out[73]:
array([3014, 3017, 3036, 3040, 3130, 3166, 3186, 3187, 3239, 3240, 3250,
3252, 3266, 3283, 3297, 3300, 3302, 3304, 3306, 3309, 3310, 3313,
3316, 3317, 3319, 3320, 3322, 3326, 3327, 3328, 3329, 3330, 3332,
3333, 3338, 3339, 3340, 3341, 3342, 3343, 3344, 3345, 3346, 3347,
3348, 3351, 3353, 3354, 3355, 3356, 3358, 3363, 3365, 3366, 3370,
3371, 3379, 3381, 3384, 3385, 3387, 3390, 3391, 3392, 3393, 3394,
3395, 3398, 3399, 3401, 3403, 3404, 3405, 3407, 3413, 3414, 3419,
3421, 3423, 3424, 3425, 3432, 3434, 3438, 3439, 3440])
In [76]:
df2 = df.merge(bad_ids, left_on='start_station_id', right_on='id', how='left').merge(
bad_ids, left_on='end_station_id', right_on='id', how='left'
)
In [82]:
df3 = df2[df2.id_x.isnull() & df2.id_y.isnull()]
In [83]:
df3.start_station_id.compute().shape
Out[83]:
(36589081,)
In [81]:
df.start_station_id.compute().shape
Out[81]:
(36902025,)
In [ ]:
Content source: r-shekhar/NYC-transport
Similar notebooks: