In [2]:
import dask.distributed
import dask.dataframe as dd
import pandas as pd
import numpy as np

import urllib, json
import sklearn.neighbors

Set up a local cluster of Dask Distributed


In [3]:
client = dask.distributed.Client()

Read historical bike data, print head and tail


In [4]:
df = dd.read_parquet('/bigdata/citibike.parquet')

In [5]:
df.head()


Out[5]:
trip_duration start_time stop_time start_station_id start_station_name start_station_latitude start_station_longitude end_station_id end_station_name end_station_latitude end_station_longitude bike_id user_type birth_year gender
0 634 2013-07-01 00:00:00 2013-07-01 00:10:34 164 E 47 St & 2 Ave 40.753231 -73.970322 504 1 Ave & E 15 St 40.732220 -73.981659 16950 Customer NaN 0
1 1547 2013-07-01 00:00:02 2013-07-01 00:25:49 388 W 26 St & 10 Ave 40.749718 -74.002953 459 W 20 St & 11 Ave 40.746746 -74.007759 19816 Customer NaN 0
2 178 2013-07-01 00:01:04 2013-07-01 00:04:02 293 Lafayette St & E 8 St 40.730286 -73.990768 237 E 11 St & 2 Ave 40.730473 -73.986725 14548 Subscriber 1980.0 2
3 1580 2013-07-01 00:01:06 2013-07-01 00:27:26 531 Forsyth St & Broome St 40.718941 -73.992661 499 Broadway & W 60 St 40.769154 -73.981918 16063 Customer NaN 0
4 757 2013-07-01 00:01:10 2013-07-01 00:13:47 382 University Pl & E 14 St 40.734928 -73.992004 410 Suffolk St & Stanton St 40.720665 -73.985176 19213 Subscriber 1986.0 1

In [6]:
df.tail()


Out[6]:
trip_duration start_time stop_time start_station_id start_station_name start_station_latitude start_station_longitude end_station_id end_station_name end_station_latitude end_station_longitude bike_id user_type birth_year gender
51474 441 2016-12-31 23:56:15 2017-01-01 00:03:36 284 Greenwich Ave & 8 Ave 40.739017 -74.002640 336 Sullivan St & Washington Sq 40.730476 -73.999062 16185 Subscriber 1974.0 2
51475 1026 2016-12-31 23:56:19 2017-01-01 00:13:26 281 Grand Army Plaza & Central Park S 40.764397 -73.973717 3143 5 Ave & E 78 St 40.776829 -73.963890 18267 None 1983.0 1
51476 1747 2016-12-31 23:56:35 2017-01-01 00:25:43 3424 E 106 St & Lexington Ave 40.791977 -73.945992 3295 Central Park W & W 96 St 40.791271 -73.964836 19899 Subscriber 1970.0 2
51477 951 2016-12-31 23:59:31 2017-01-01 00:15:23 3158 W 63 St & Broadway 40.771637 -73.982613 3169 Riverside Dr & W 82 St 40.787209 -73.981277 16866 Subscriber 1961.0 1
51478 1322 2016-12-31 23:59:56 2017-01-01 00:21:58 3263 Cooper Square & E 7 St 40.729237 -73.990868 498 Broadway & W 32 St 40.748550 -73.988083 25793 Subscriber 1985.0 1

In [7]:
# Count rows
df.start_station_id.compute().shape


Out[7]:
(36902025,)

In [8]:
# Select start and end columns as separate dataframes, 
# apply range checks on lats and lons to eliminate corrupted rows

df_s = df[['start_station_id', 'start_station_name', 
           'start_station_latitude', 'start_station_longitude']]
df_e = df[['end_station_id', 'end_station_name', 
           'end_station_latitude', 'end_station_longitude']]

df_s = df_s[(df_s.start_station_latitude > 40.) & (df_s.start_station_latitude < 41.)]
df_s = df_s[(df_s.start_station_longitude + 74.0 > -0.25) & (df_s.start_station_longitude + 74.0 < 0.25)]

df_e = df_e[(df_e.end_station_latitude > 40.) & (df_e.end_station_latitude < 41.)]
df_e = df_e[(df_e.end_station_longitude + 74.0 > -0.25) & (df_e.end_station_longitude + 74.0 < 0.25)]

In [9]:
df_s.head()


Out[9]:
start_station_id start_station_name start_station_latitude start_station_longitude
0 164 E 47 St & 2 Ave 40.753231 -73.970322
1 388 W 26 St & 10 Ave 40.749718 -74.002953
2 293 Lafayette St & E 8 St 40.730286 -73.990768
3 531 Forsyth St & Broome St 40.718941 -73.992661
4 382 University Pl & E 14 St 40.734928 -73.992004

Take the start locations, end locations, calculate mean latitude and longitude, convert to pandas, rename columns and concatenate. Sort by id. This will contain duplicates due to numerical precision and due to actual duplication due to some other reason.


In [10]:
a1 = df_s.groupby(['start_station_id', 'start_station_name']).mean().compute()
a2 = df_e.groupby(['end_station_id', 'end_station_name']).mean().compute()

a1.index = a1.index.rename(['id', 'name'])
a1.columns = ['lat', 'lon']
a2.index = a2.index.rename(['id', 'name'])
a2.columns = ['lat', 'lon']

trip_points = a1.append(a2).drop_duplicates().reset_index()
trip_points = trip_points.sort_values('id').reset_index(drop=True)

del a1, a2
trip_points.head()


Out[10]:
id name lat lon
0 72 W 52 St & 11 Ave 40.767272 -73.993927
1 72 W 52 St & 11 Ave 40.767273 -73.993926
2 79 Franklin St & W Broadway 40.719118 -74.006668
3 79 Franklin St & W Broadway 40.719117 -74.006671
4 82 St James Pl & Pearl St 40.711174 -74.000166

In [11]:
trip_points.shape


Out[11]:
(1413, 4)

Read current data state from web

This will be the authoritative list of bike stations and their coordinates


In [12]:
# Uncomment these two lines to fetch latest file and rename it

# !wget https://feeds.citibikenyc.com/stations/stations.json
# !mv stations.json stations.`date +"%Y.%m.%d.%H.%M"`.json

In [13]:
df_cur = pd.DataFrame(
    json.loads(
        open('stations.2017.04.20.09.43.json', 'r').read()
    )['stationBeanList'])
df_cur.head()


Out[13]:
altitude availableBikes availableDocks city id landMark lastCommunicationTime latitude location longitude postalCode stAddress1 stAddress2 stationName statusKey statusValue testStation totalDocks
0 20 16 72 2017-04-20 09:43:27 AM 40.767272 -73.993929 W 52 St & 11 Ave W 52 St & 11 Ave 1 In Service False 39
1 12 20 79 2017-04-20 09:42:54 AM 40.719116 -74.006667 Franklin St & W Broadway Franklin St & W Broadway 1 In Service False 33
2 0 0 82 2017-04-13 12:05:52 PM 40.711174 -74.000165 St James Pl & Pearl St St James Pl & Pearl St 3 Not In Service False 0
3 35 24 83 2017-04-20 09:42:19 AM 40.683826 -73.976323 Atlantic Ave & Fort Greene Pl Atlantic Ave & Fort Greene Pl 1 In Service False 62
4 5 34 116 2017-04-20 09:42:20 AM 40.741776 -74.001497 W 17 St & 8 Ave W 17 St & 8 Ave 1 In Service False 39

In [14]:
df_cur = df_cur[['id', 'stationName', 'latitude', 'longitude']].sort_values('id')
df_cur.head()


Out[14]:
id stationName latitude longitude
0 72 W 52 St & 11 Ave 40.767272 -73.993929
1 79 Franklin St & W Broadway 40.719116 -74.006667
2 82 St James Pl & Pearl St 40.711174 -74.000165
3 83 Atlantic Ave & Fort Greene Pl 40.683826 -73.976323
4 116 W 17 St & 8 Ave 40.741776 -74.001497

In [15]:
# We see this has less than half the rows of trip_points
print(df_cur.shape)


(664, 4)

Do a spatial nearest neighbor search

Construct a BallTree using the actual correct coordinate data from the JSON feed. Use that to lookup the locations in trip_points.


In [16]:
bt = sklearn.neighbors.BallTree(df_cur[['latitude', 'longitude']].values, metric='euclidean')

In [17]:
np.cos(np.deg2rad(40.7))


Out[17]:
0.75813433619765225

25 meters is approximately 0.000225˚ (not accounting for latitude) latitude will give a factor of cos(41˚N)=0.76 at 41˚N. So we are really testing if the point lies within an ellipse with a radius of 25.6 meters in latitude and 25.6*0.75=19.2 meters in longitude. Should be good enough.


In [18]:
0.000225*111194.9 # 111.1949 km per degree latitude


Out[18]:
25.018852499999998

In [19]:
query_results = bt.query_radius(trip_points.ix[:, 2:], 0.000225)

The value of each row indicates the row in df_cur that matches each row in trip_points A zero for the first and second rows indicates that those rows in trip points match with the zeroth row in df_cur.


In [20]:
results_list = [[int(y) for y in x.tolist()] for x in query_results]

print(len(results_list))
results_list


1413
Out[20]:
[[0],
 [0],
 [1],
 [1],
 [2],
 [2],
 [3],
 [3],
 [4],
 [4],
 [5],
 [5],
 [6],
 [6],
 [7],
 [7],
 [8],
 [8],
 [9],
 [9],
 [10],
 [10],
 [11],
 [11],
 [12],
 [12],
 [13],
 [13],
 [14],
 [14],
 [15],
 [15],
 [16],
 [16],
 [17],
 [17],
 [18],
 [18],
 [],
 [],
 [19],
 [19],
 [20],
 [20],
 [21],
 [21],
 [22],
 [22],
 [23],
 [23],
 [24],
 [24],
 [25],
 [25],
 [26],
 [26],
 [27],
 [27],
 [28],
 [28],
 [],
 [],
 [29],
 [29],
 [30],
 [30],
 [31],
 [31],
 [32],
 [32],
 [33],
 [33],
 [34],
 [34],
 [651],
 [],
 [651],
 [],
 [35],
 [35],
 [36],
 [36],
 [37],
 [37],
 [38],
 [38],
 [39],
 [39],
 [],
 [40],
 [40],
 [],
 [],
 [],
 [42],
 [42],
 [43],
 [43],
 [44],
 [44],
 [45],
 [45],
 [46],
 [46],
 [],
 [],
 [],
 [],
 [47],
 [47],
 [48],
 [48],
 [49],
 [49],
 [50],
 [50],
 [],
 [],
 [51],
 [51],
 [52],
 [52],
 [53],
 [53],
 [54],
 [54],
 [55],
 [55],
 [56],
 [56],
 [],
 [],
 [57],
 [57],
 [58],
 [58],
 [59],
 [59],
 [60],
 [60],
 [61],
 [61],
 [62],
 [62],
 [],
 [],
 [63],
 [63],
 [64],
 [64],
 [65],
 [65],
 [66],
 [66],
 [],
 [67],
 [67],
 [67],
 [67],
 [],
 [68],
 [68],
 [69],
 [69],
 [],
 [],
 [71],
 [71],
 [72],
 [72],
 [73],
 [73],
 [],
 [],
 [74],
 [74],
 [],
 [],
 [],
 [],
 [76],
 [76],
 [77],
 [77],
 [78],
 [78],
 [],
 [],
 [],
 [],
 [],
 [],
 [79],
 [79],
 [80],
 [80],
 [81],
 [81],
 [82],
 [82],
 [83],
 [83],
 [84],
 [84],
 [85],
 [85],
 [86],
 [86],
 [87],
 [87],
 [88],
 [88],
 [89],
 [89],
 [90],
 [90],
 [90],
 [90],
 [91],
 [91],
 [92],
 [92],
 [],
 [],
 [93],
 [93],
 [94],
 [94],
 [95],
 [95],
 [],
 [],
 [96],
 [],
 [],
 [96],
 [],
 [97],
 [97],
 [],
 [98],
 [98],
 [99],
 [99],
 [100],
 [100],
 [101],
 [101],
 [102],
 [102],
 [103],
 [103],
 [104],
 [104],
 [105],
 [105],
 [],
 [],
 [],
 [],
 [106],
 [106],
 [107],
 [107],
 [108],
 [108],
 [109],
 [109],
 [110],
 [110],
 [111],
 [111],
 [112],
 [112],
 [113],
 [113],
 [114],
 [114],
 [115],
 [115],
 [116],
 [116],
 [117],
 [117],
 [118],
 [118],
 [119],
 [119],
 [120],
 [120],
 [],
 [121],
 [121],
 [],
 [122],
 [122],
 [123],
 [123],
 [124],
 [124],
 [125],
 [125],
 [126],
 [126],
 [127],
 [127],
 [128],
 [128],
 [129],
 [129],
 [130],
 [130],
 [131],
 [131],
 [132],
 [132],
 [133],
 [133],
 [134],
 [134],
 [135],
 [135],
 [136],
 [136],
 [137],
 [137],
 [138],
 [138],
 [139],
 [139],
 [140],
 [140],
 [],
 [],
 [141],
 [141],
 [142],
 [142],
 [143],
 [143],
 [144],
 [144],
 [],
 [],
 [],
 [],
 [145],
 [145],
 [146],
 [146],
 [147],
 [147],
 [148],
 [148],
 [149],
 [149],
 [150],
 [150],
 [151],
 [151],
 [],
 [],
 [152],
 [152],
 [153],
 [153],
 [154],
 [154],
 [155],
 [155],
 [156],
 [156],
 [157],
 [157],
 [158],
 [158],
 [159],
 [159],
 [160],
 [160],
 [161],
 [161],
 [162],
 [162],
 [163],
 [163],
 [164],
 [164],
 [165],
 [165],
 [166],
 [166],
 [167],
 [167],
 [168],
 [168],
 [169],
 [169],
 [170],
 [170],
 [],
 [],
 [171],
 [171],
 [172],
 [172],
 [173],
 [173],
 [174],
 [174],
 [175],
 [175],
 [176],
 [176],
 [177],
 [177],
 [178],
 [178],
 [179],
 [179],
 [180],
 [180],
 [181],
 [181],
 [182],
 [182],
 [183],
 [183],
 [184],
 [184],
 [185],
 [185],
 [186],
 [186],
 [187],
 [187],
 [188],
 [188],
 [189],
 [189],
 [190],
 [],
 [190],
 [],
 [191],
 [191],
 [192],
 [192],
 [],
 [],
 [193],
 [193],
 [194],
 [194],
 [195],
 [195],
 [196],
 [196],
 [197],
 [197],
 [198],
 [198],
 [199],
 [199],
 [200],
 [200],
 [201],
 [201],
 [202],
 [202],
 [203],
 [203],
 [204],
 [204],
 [204],
 [204],
 [205],
 [205],
 [206],
 [206],
 [207],
 [207],
 [208],
 [208],
 [209],
 [209],
 [210],
 [210],
 [211],
 [211],
 [212],
 [212],
 [213],
 [213],
 [214],
 [214],
 [215],
 [215],
 [216],
 [216],
 [217],
 [217],
 [218],
 [218],
 [219],
 [219],
 [220],
 [220],
 [221],
 [221],
 [],
 [],
 [],
 [],
 [222],
 [222],
 [223],
 [223],
 [224],
 [224],
 [225],
 [225],
 [226],
 [226],
 [227],
 [227],
 [228],
 [228],
 [229],
 [229],
 [230],
 [230],
 [231],
 [231],
 [],
 [],
 [232],
 [232],
 [233],
 [233],
 [234],
 [234],
 [235],
 [235],
 [236],
 [236],
 [237],
 [237],
 [238],
 [238],
 [239],
 [239],
 [240],
 [240],
 [241],
 [241],
 [242],
 [242],
 [243],
 [243],
 [244],
 [244],
 [],
 [],
 [245],
 [245],
 [246],
 [246],
 [247],
 [247],
 [248],
 [248],
 [249],
 [249],
 [250],
 [250],
 [251],
 [251],
 [252],
 [252],
 [253],
 [253],
 [254],
 [254],
 [255],
 [255],
 [256],
 [256],
 [257],
 [257],
 [258],
 [258],
 [259],
 [259],
 [259],
 [259],
 [260],
 [260],
 [261],
 [261],
 [262],
 [262],
 [263],
 [263],
 [],
 [],
 [264],
 [264],
 [],
 [],
 [265],
 [265],
 [266],
 [266],
 [267],
 [267],
 [268],
 [268],
 [269],
 [475],
 [269],
 [269],
 [475],
 [269],
 [270],
 [270],
 [271],
 [271],
 [271],
 [],
 [],
 [271],
 [272],
 [272],
 [],
 [],
 [483],
 [483],
 [273],
 [273],
 [274],
 [274],
 [275],
 [275],
 [276],
 [276],
 [277],
 [277],
 [],
 [278],
 [278],
 [],
 [279],
 [279],
 [280],
 [280],
 [281],
 [281],
 [282],
 [282],
 [283],
 [283],
 [284],
 [284],
 [285],
 [285],
 [286],
 [286],
 [287],
 [287],
 [],
 [],
 [],
 [],
 [288],
 [288],
 [],
 [289],
 [],
 [289],
 [290],
 [290],
 [291],
 [291],
 [292],
 [292],
 [],
 [293],
 [293],
 [],
 [294],
 [294],
 [295],
 [295],
 [296],
 [296],
 [297],
 [297],
 [298],
 [298],
 [299],
 [299],
 [300],
 [300],
 [301],
 [301],
 [302],
 [302],
 [],
 [],
 [303],
 [303],
 [],
 [],
 [304],
 [304],
 [305],
 [305],
 [306],
 [306],
 [],
 [],
 [],
 [],
 [],
 [307],
 [307],
 [],
 [],
 [],
 [],
 [],
 [476],
 [],
 [],
 [308],
 [308],
 [309],
 [309],
 [310],
 [310],
 [311],
 [311],
 [312],
 [312],
 [313],
 [313],
 [314],
 [314],
 [315],
 [315],
 [316],
 [316],
 [317],
 [317],
 [318],
 [318],
 [319],
 [319],
 [320],
 [320],
 [321],
 [321],
 [322],
 [322],
 [323],
 [323],
 [324],
 [324],
 [325],
 [325],
 [326],
 [326],
 [327],
 [327],
 [328],
 [328],
 [329],
 [329],
 [330],
 [330],
 [331],
 [331],
 [332],
 [332],
 [333],
 [333],
 [334],
 [334],
 [335],
 [335],
 [336],
 [336],
 [337],
 [337],
 [338],
 [338],
 [339],
 [339],
 [340],
 [340],
 [341],
 [341],
 [342],
 [342],
 [343],
 [343],
 [],
 [],
 [344],
 [344],
 [345],
 [345],
 [346],
 [346],
 [347],
 [347],
 [],
 [],
 [348],
 [348],
 [349],
 [349],
 [350],
 [350],
 [351],
 [351],
 [],
 [],
 [352],
 [352],
 [353],
 [353],
 [354],
 [354],
 [355],
 [355],
 [356],
 [356],
 [357],
 [357],
 [358],
 [358],
 [],
 [],
 [],
 [],
 [],
 [],
 [359],
 [359],
 [360],
 [360],
 [361],
 [361],
 [362],
 [362],
 [],
 [],
 [],
 [],
 [363],
 [363],
 [364],
 [364],
 [365],
 [365],
 [366],
 [366],
 [367],
 [367],
 [368],
 [368],
 [369],
 [369],
 [370],
 [370],
 [371],
 [371],
 [372],
 [372],
 [373],
 [373],
 [374],
 [374],
 [375],
 [375],
 [376],
 [376],
 [377],
 [377],
 [378],
 [378],
 [378],
 [378],
 [379],
 [379],
 [380],
 [380],
 [381],
 [381],
 [382],
 [382],
 [383],
 [383],
 [384],
 [384],
 [385],
 [385],
 [386],
 [386],
 [387],
 [387],
 [388],
 [388],
 [389],
 [389],
 [390],
 [390],
 [],
 [],
 [],
 [],
 [391],
 [391],
 [392],
 [392],
 [393],
 [393],
 [394],
 [394],
 [],
 [],
 [395],
 [395],
 [396],
 [396],
 [397],
 [397],
 [398],
 [398],
 [399],
 [399],
 [400],
 [400],
 [401],
 [401],
 [402],
 [402],
 [403],
 [403],
 [404],
 [404],
 [],
 [],
 [405],
 [405],
 [406],
 [406],
 [407],
 [407],
 [],
 [],
 [],
 [],
 [408],
 [408],
 [409],
 [409],
 [410],
 [410],
 [411],
 [411],
 [412],
 [412],
 [413],
 [413],
 [414],
 [414],
 [415],
 [415],
 [416],
 [416],
 [417],
 [417],
 [418],
 [418],
 [419],
 [419],
 [419],
 [419],
 [420],
 [420],
 [421],
 [421],
 [422],
 [422],
 [423],
 [423],
 [424],
 ...]

This shows for each row in trip points, there are zero or one matches, indicating 25 meters was an appropriate choice. At 50 meters, some rows have more than one match.


In [21]:
max((len(x) for x in results_list))


Out[21]:
1

In [22]:
min((len(x) for x in results_list))


Out[22]:
0

Merge spatial query results with trip_points

Historical trip data with can be compared to the current station list.


In [23]:
# nan if no match, else row_id in df_cur
trip_points['match_id'] = np.array([np.float64(x) 
                                    if x.shape[0] > 0 else np.nan
                                    for x in query_results.T])

In [24]:
trip_points.head()


Out[24]:
id name lat lon match_id
0 72 W 52 St & 11 Ave 40.767272 -73.993927 0.0
1 72 W 52 St & 11 Ave 40.767273 -73.993926 0.0
2 79 Franklin St & W Broadway 40.719118 -74.006668 1.0
3 79 Franklin St & W Broadway 40.719117 -74.006671 1.0
4 82 St James Pl & Pearl St 40.711174 -74.000166 2.0

In [25]:
df_cur['id'] = df_cur['id'].astype(np.float64)

In [26]:
lookup_table = trip_points.merge(df_cur, left_on='match_id', right_index=True, how='left')
lookup_table


Out[26]:
id_x name lat lon match_id id_y stationName latitude longitude
0 72 W 52 St & 11 Ave 40.767272 -73.993927 0.0 72.0 W 52 St & 11 Ave 40.767272 -73.993929
1 72 W 52 St & 11 Ave 40.767273 -73.993926 0.0 72.0 W 52 St & 11 Ave 40.767272 -73.993929
2 79 Franklin St & W Broadway 40.719118 -74.006668 1.0 79.0 Franklin St & W Broadway 40.719116 -74.006667
3 79 Franklin St & W Broadway 40.719117 -74.006671 1.0 79.0 Franklin St & W Broadway 40.719116 -74.006667
4 82 St James Pl & Pearl St 40.711174 -74.000166 2.0 82.0 St James Pl & Pearl St 40.711174 -74.000165
5 82 St James Pl & Pearl St 40.711174 -74.000167 2.0 82.0 St James Pl & Pearl St 40.711174 -74.000165
6 83 Atlantic Ave & Fort Greene Pl 40.683826 -73.976325 3.0 83.0 Atlantic Ave & Fort Greene Pl 40.683826 -73.976323
7 83 Atlantic Ave & Fort Greene Pl 40.683826 -73.976322 3.0 83.0 Atlantic Ave & Fort Greene Pl 40.683826 -73.976323
8 116 W 17 St & 8 Ave 40.741777 -74.001498 4.0 116.0 W 17 St & 8 Ave 40.741776 -74.001497
9 116 W 17 St & 8 Ave 40.741776 -74.001493 4.0 116.0 W 17 St & 8 Ave 40.741776 -74.001497
10 119 Park Ave & St Edwards St 40.696094 -73.978033 5.0 119.0 Park Ave & St Edwards St 40.696089 -73.978034
11 119 Park Ave & St Edwards St 40.696093 -73.978030 5.0 119.0 Park Ave & St Edwards St 40.696089 -73.978034
12 120 Lexington Ave & Classon Ave 40.686766 -73.959280 6.0 120.0 Lexington Ave & Classon Ave 40.686768 -73.959282
13 120 Lexington Ave & Classon Ave 40.686768 -73.959284 6.0 120.0 Lexington Ave & Classon Ave 40.686768 -73.959282
14 127 Barrow St & Hudson St 40.731725 -74.006747 7.0 127.0 Barrow St & Hudson St 40.731724 -74.006744
15 127 Barrow St & Hudson St 40.731723 -74.006745 7.0 127.0 Barrow St & Hudson St 40.731724 -74.006744
16 128 MacDougal St & Prince St 40.727104 -74.002966 8.0 128.0 MacDougal St & Prince St 40.727103 -74.002971
17 128 MacDougal St & Prince St 40.727104 -74.002966 8.0 128.0 MacDougal St & Prince St 40.727103 -74.002971
18 137 E 56 St & Madison Ave 40.761628 -73.972924 9.0 137.0 E 56 St & Madison Ave 40.761628 -73.972924
19 137 E 56 St & Madison Ave 40.761625 -73.972924 9.0 137.0 E 56 St & Madison Ave 40.761628 -73.972924
20 143 Clinton St & Joralemon St 40.692394 -73.993375 10.0 143.0 Clinton St & Joralemon St 40.692395 -73.993379
21 143 Clinton St & Joralemon St 40.692393 -73.993375 10.0 143.0 Clinton St & Joralemon St 40.692395 -73.993379
22 144 Nassau St & Navy St 40.698401 -73.980691 11.0 144.0 Nassau St & Navy St 40.698399 -73.980689
23 144 Nassau St & Navy St 40.698402 -73.980690 11.0 144.0 Nassau St & Navy St 40.698399 -73.980689
24 146 Hudson St & Reade St 40.716253 -74.009111 12.0 146.0 Hudson St & Reade St 40.716250 -74.009106
25 146 Hudson St & Reade St 40.716250 -74.009108 12.0 146.0 Hudson St & Reade St 40.716250 -74.009106
26 147 Greenwich St & Warren St 40.715423 -74.011220 13.0 147.0 Greenwich St & Warren St 40.715422 -74.011220
27 147 Greenwich St & Warren St 40.715424 -74.011226 13.0 147.0 Greenwich St & Warren St 40.715422 -74.011220
28 150 E 2 St & Avenue C 40.720874 -73.980854 14.0 150.0 E 2 St & Avenue C 40.720874 -73.980858
29 150 E 2 St & Avenue C 40.720875 -73.980855 14.0 150.0 E 2 St & Avenue C 40.720874 -73.980858
... ... ... ... ... ... ... ... ... ...
1383 3422 Columbia St & Degraw St 40.685927 -74.002429 638.0 3422.0 Columbia St & Degraw St 40.685930 -74.002424
1384 3423 West Drive & Prospect Park West 40.661064 -73.979458 639.0 3423.0 West Drive & Prospect Park West 40.661063 -73.979453
1385 3423 West Drive & Prospect Park West 40.661066 -73.979451 639.0 3423.0 West Drive & Prospect Park West 40.661063 -73.979453
1386 3424 E 106 St & Lexington Ave 40.791978 -73.945991 640.0 3424.0 E 106 St & Lexington Ave 40.791976 -73.945993
1387 3424 E 106 St & Lexington Ave 40.791976 -73.945994 640.0 3424.0 E 106 St & Lexington Ave 40.791976 -73.945993
1388 3425 2 Ave & E 104 St 40.789212 -73.943714 641.0 3425.0 2 Ave & E 104 St 40.789211 -73.943708
1389 3425 2 Ave & E 104 St 40.789211 -73.943708 641.0 3425.0 2 Ave & E 104 St 40.789211 -73.943708
1390 3427 Lafayette St & Jersey St 40.724303 -73.996004 642.0 3427.0 Lafayette St & Jersey St 40.724305 -73.996010
1391 3427 Lafayette St & Jersey St 40.724304 -73.996006 642.0 3427.0 Lafayette St & Jersey St 40.724305 -73.996010
1392 3428 8 Ave & W 16 St 40.740986 -74.001695 643.0 3428.0 8 Ave & W 16 St 40.740983 -74.001702
1393 3428 8 Ave & W 16 St 40.740985 -74.001704 643.0 3428.0 8 Ave & W 16 St 40.740983 -74.001702
1394 3429 Hanson Pl & Ashland Pl 40.685067 -73.977907 644.0 3429.0 Hanson Pl & Ashland Pl 40.685068 -73.977908
1395 3429 Hanson Pl & Ashland Pl 40.685065 -73.977906 644.0 3429.0 Hanson Pl & Ashland Pl 40.685068 -73.977908
1396 3430 Richardson St & N Henry St 40.719078 -73.942233 645.0 3430.0 Richardson St & N Henry St 40.719079 -73.942237
1397 3430 Richardson St & N Henry St 40.719081 -73.942231 645.0 3430.0 Richardson St & N Henry St 40.719079 -73.942237
1398 3431 E 35 St & 3 Ave 40.746526 -73.977886 646.0 3431.0 E 35 St & 3 Ave 40.746524 -73.977885
1399 3431 E 35 St & 3 Ave 40.746523 -73.977879 646.0 3431.0 E 35 St & 3 Ave 40.746524 -73.977885
1400 3432 Bike in Movie Night | Prospect Park Bandshell 40.662987 -73.976919 NaN NaN NaN NaN NaN
1401 3432 Bike in Movie Night | Prospect Park Bandshell 40.662988 -73.976922 NaN NaN NaN NaN NaN
1402 3434 W 88 St & West End Ave 40.790251 -73.977181 647.0 3434.0 W 88 St & West End Ave 40.790254 -73.977183
1403 3434 W 88 St & West End Ave 40.790254 -73.977179 647.0 3434.0 W 88 St & West End Ave 40.790254 -73.977183
1404 3435 Grand St & Elizabeth St 40.718822 -73.995959 648.0 3435.0 Grand St & Elizabeth St 40.718822 -73.995960
1405 3435 Grand St & Elizabeth St 40.718820 -73.995958 648.0 3435.0 Grand St & Elizabeth St 40.718822 -73.995960
1406 3436 Greenwich St & Hubert St 40.721319 -74.010065 649.0 3436.0 Greenwich St & Hubert St 40.721319 -74.010065
1407 3436 Greenwich St & Hubert St 40.721318 -74.010060 649.0 3436.0 Greenwich St & Hubert St 40.721319 -74.010065
1408 3438 E 76 St & 3 Ave 40.772249 -73.958419 650.0 3438.0 E 76 St & 3 Ave 40.772249 -73.958421
1409 3438 E 76 St & 3 Ave 40.772249 -73.958419 650.0 3438.0 E 76 St & 3 Ave 40.772249 -73.958421
1410 3439 Broadway & E 22 St - Valet Scan 40.740345 -73.989548 169.0 402.0 Broadway & E 22 St 40.740343 -73.989551
1411 3440 Fulton St & Adams St 40.692416 -73.989497 651.0 3440.0 Fulton St & Adams St 40.692418 -73.989495
1412 3440 Fulton St & Adams St 40.692417 -73.989502 651.0 3440.0 Fulton St & Adams St 40.692418 -73.989495

1413 rows × 9 columns

The above table shows rows with NaN. These stations have no current counterpart.


In [27]:
defunct_stations = lookup_table[np.isnan(lookup_table.match_id)]
defunct_stations


Out[27]:
id_x name lat lon match_id id_y stationName latitude longitude
38 160 E 37 St & Lexington Ave 40.748195 -73.978309 NaN NaN NaN NaN NaN
39 160 E 37 St & Lexington Ave 40.748192 -73.978310 NaN NaN NaN NaN NaN
60 218 Gallatin Pl & Livingston St 40.690286 -73.987071 NaN NaN NaN NaN NaN
61 218 Gallatin Pl & Livingston St 40.690285 -73.987069 NaN NaN NaN NaN NaN
75 233 Cadman Plaza W & Pierrepont St 40.694757 -73.990522 NaN NaN NaN NaN NaN
77 233 Cadman Plaza W & Pierrepont St 40.694754 -73.990530 NaN NaN NaN NaN NaN
88 242 Flushing Ave & Carlton Ave 40.697882 -73.973503 NaN NaN NaN NaN NaN
91 242 Flushing Ave & Carlton Ave 40.697882 -73.973503 NaN NaN NaN NaN NaN
92 243 Fulton St & Rockwell Pl 40.688151 -73.979105 NaN NaN NaN NaN NaN
93 243 Fulton St & Rockwell Pl 40.688129 -73.979028 NaN NaN NaN NaN NaN
104 250 Lafayette St & Jersey St 40.724560 -73.995651 NaN NaN NaN NaN NaN
105 250 Lafayette St & Jersey St 40.724557 -73.995650 NaN NaN NaN NaN NaN
106 250 Lafayette St & Jersey St N 40.724560 -73.995653 NaN NaN NaN NaN NaN
107 250 Lafayette St & Jersey St N 40.724559 -73.995649 NaN NaN NaN NaN NaN
116 255 NYCBS Depot - SSP 40.646765 -74.016513 NaN NaN NaN NaN NaN
117 255 NYCBS Depot - SSP 40.646766 -74.016510 NaN NaN NaN NaN NaN
130 263 Elizabeth St & Hester St 40.717329 -73.996356 NaN NaN NaN NaN NaN
131 263 Elizabeth St & Hester St 40.717329 -73.996351 NaN NaN NaN NaN NaN
144 271 Ashland Pl & Hanson Pl 40.685276 -73.978059 NaN NaN NaN NaN NaN
145 271 Ashland Pl & Hanson Pl 40.685281 -73.978057 NaN NaN NaN NaN NaN
154 279 Sands St & Gold St 40.699870 -73.982716 NaN NaN NaN NaN NaN
159 279 Sands St & Gold St 40.699870 -73.982720 NaN NaN NaN NaN NaN
164 282 Kent Ave & S 11 St 40.708024 -73.968367 NaN NaN NaN NaN NaN
165 282 Kent Ave & S 11 St 40.708025 -73.968372 NaN NaN NaN NaN NaN
172 290 2 Ave & E 58 St 40.760204 -73.964786 NaN NaN NaN NaN NaN
173 290 2 Ave & E 58 St 40.760206 -73.964780 NaN NaN NaN NaN NaN
176 293 Lafayette St & E 8 St 40.730284 -73.990770 NaN NaN NaN NaN NaN
177 293 Lafayette St & E 8 St 40.730285 -73.990771 NaN NaN NaN NaN NaN
178 294 Washington Square E 40.730497 -73.995719 NaN NaN NaN NaN NaN
179 294 Washington Square E 40.730495 -73.995721 NaN NaN NaN NaN NaN
... ... ... ... ... ... ... ... ... ...
1053 3229 Marcy Ave & MacDonough St 40.680620 -73.946245 NaN NaN NaN NaN NaN
1054 3230 Penn Station Valet 40.751282 -73.996927 NaN NaN NaN NaN NaN
1055 3230 Penn Station Valet 40.751279 -73.996926 NaN NaN NaN NaN NaN
1062 3234 E 40 St & Madison Ave 40.751593 -73.980427 NaN NaN NaN NaN NaN
1063 3234 E 40 St & Madison Ave 40.751592 -73.980430 NaN NaN NaN NaN NaN
1070 3237 21 St & 41 Ave 40.753832 -73.942681 NaN NaN NaN NaN NaN
1071 3237 21 St & 41 Ave 40.753833 -73.942676 NaN NaN NaN NaN NaN
1074 3239 Bressler 40.646539 -74.016588 NaN NaN NaN NaN NaN
1083 3245 Kiosk in a box Motivate 40.646677 -74.016298 NaN NaN NaN NaN NaN
1084 3246 Montague St & Clinton St 40.694282 -73.992300 NaN NaN NaN NaN NaN
1085 3246 Montague St & Clinton St 40.694284 -73.992301 NaN NaN NaN NaN NaN
1088 3250 NYCBS Depot - PIT 40.716912 -73.983843 NaN NaN NaN NaN NaN
1089 3250 NYCBS Depot - PIT 40.716913 -73.983843 NaN NaN NaN NaN NaN
1092 3253 Bike The Branches - Central Branch 40.672804 -73.968868 NaN NaN NaN NaN NaN
1093 3253 Bike The Branches - Central Branch 40.672806 -73.968864 NaN NaN NaN NaN NaN
1094 3254 Soissons Landing 40.692320 -74.014862 NaN NaN NaN NaN NaN
1095 3254 Soissons Landing 40.692319 -74.014863 NaN NaN NaN NaN NaN
1100 3257 Adventures NYC 40.773769 -73.971169 NaN NaN NaN NaN NaN
1101 3257 Adventures NYC 40.773771 -73.971169 NaN NaN NaN NaN NaN
1110 3264 E 61 St & Park Ave 40.764207 -73.969101 NaN NaN NaN NaN NaN
1111 3264 E 61 St & Park Ave 40.764210 -73.969101 NaN NaN NaN NaN NaN
1112 3265 E 2 St & 2 Ave 40.724565 -73.989439 NaN NaN NaN NaN NaN
1113 3265 E 2 St & 2 Ave 40.724564 -73.989439 NaN NaN NaN NaN NaN
1114 3266 Kiosk in a box Deployment 40.708611 -73.928507 NaN NaN NaN NaN NaN
1115 3266 Kiosk in a box Deployment 40.708609 -73.928510 NaN NaN NaN NaN NaN
1138 3291 Riverside Dr & W 91 St 40.793373 -73.976851 NaN NaN NaN NaN NaN
1139 3291 Riverside Dr & W 91 St 40.793372 -73.976853 NaN NaN NaN NaN NaN
1311 3385 2 Ave & E 105 St 40.789818 -73.942963 NaN NaN NaN NaN NaN
1400 3432 Bike in Movie Night | Prospect Park Bandshell 40.662987 -73.976919 NaN NaN NaN NaN NaN
1401 3432 Bike in Movie Night | Prospect Park Bandshell 40.662988 -73.976922 NaN NaN NaN NaN NaN

174 rows × 9 columns


In [28]:
defunct_stations = defunct_stations[['id_x', 'name']].drop_duplicates()
defunct_stations


Out[28]:
id_x name
38 160 E 37 St & Lexington Ave
60 218 Gallatin Pl & Livingston St
75 233 Cadman Plaza W & Pierrepont St
88 242 Flushing Ave & Carlton Ave
92 243 Fulton St & Rockwell Pl
104 250 Lafayette St & Jersey St
106 250 Lafayette St & Jersey St N
116 255 NYCBS Depot - SSP
130 263 Elizabeth St & Hester St
144 271 Ashland Pl & Hanson Pl
154 279 Sands St & Gold St
164 282 Kent Ave & S 11 St
172 290 2 Ave & E 58 St
176 293 Lafayette St & E 8 St
178 294 Washington Square E
186 298 3 Ave & Schermerhorn St
188 300 Shevchenko Pl & E 7 St
189 300 Shevchenko Pl & E 6 St
222 314 Montague St & Clinton St
230 318 E 43 St & Vanderbilt Ave
233 319 Park Pl & Church St
236 320 Church St & Leonard St
256 329 Greenwich St & N Moore St
258 329 Greenwich St & North Moore St
290 347 W Houston St & Hudson St
332 367 E 53 St & Lexington Ave
342 375 Mercer St & Bleecker St
343 375 Mercer St & Bleecker St S
360 384 Fulton St & Waverly Ave
400 404 9 Ave & W 14 St
... ... ...
857 3104 KentAve&N7 St
858 3104 Kent Ave & N 7 St
919 3133 E 67 St & Lexington Ave
920 3133 E 67 St & Park Ave
931 3138 E 77 St & Park Ave
953 3149 E 82 St & 2 Ave
961 3153 E 71 St & 2 Ave
963 3154 E 77 St & 3 Ave
1017 3181 Soissons Landing
1019 3182 Yankee Ferry Terminal
1024 3186 Grove St PATH
1038 3219 NYCBS Depot - STY
1043 3222 Hanson Pl & St Felix St
1052 3229 Marcy Ave & MacDonough St
1054 3230 Penn Station Valet
1062 3234 E 40 St & Madison Ave
1070 3237 21 St & 41 Ave
1074 3239 Bressler
1083 3245 Kiosk in a box Motivate
1084 3246 Montague St & Clinton St
1088 3250 NYCBS Depot - PIT
1092 3253 Bike The Branches - Central Branch
1094 3254 Soissons Landing
1100 3257 Adventures NYC
1110 3264 E 61 St & Park Ave
1112 3265 E 2 St & 2 Ave
1114 3266 Kiosk in a box Deployment
1138 3291 Riverside Dr & W 91 St
1311 3385 2 Ave & E 105 St
1400 3432 Bike in Movie Night | Prospect Park Bandshell

90 rows × 2 columns


In [29]:
defunct_trips_start = df.merge(defunct_stations, left_on=['start_station_id', 'start_station_name'],
         right_on=['id_x', 'name'])
defunct_trips_end = df.merge(defunct_stations, left_on=['end_station_id', 'end_station_name'],
         right_on=['id_x', 'name'])
defunct_trips_all = (defunct_trips_start.append(defunct_trips_end).compute().drop_duplicates())
defunct_trips_all


Out[29]:
trip_duration start_time stop_time start_station_id start_station_name start_station_latitude start_station_longitude end_station_id end_station_name end_station_latitude end_station_longitude bike_id user_type birth_year gender id_x name
0 178 2013-07-01 00:01:04 2013-07-01 00:04:02 293 Lafayette St & E 8 St 40.730286 -73.990768 237 E 11 St & 2 Ave 40.730473 -73.986725 14548 Subscriber 1980.0 2 293 Lafayette St & E 8 St
1 550 2013-07-01 00:01:59 2013-07-01 00:11:09 293 Lafayette St & E 8 St 40.730286 -73.990768 394 E 9 St & Avenue C 40.725212 -73.977684 16746 Customer NaN 0 293 Lafayette St & E 8 St
2 615 2013-07-01 05:48:21 2013-07-01 05:58:36 293 Lafayette St & E 8 St 40.730286 -73.990768 507 E 25 St & 2 Ave 40.739124 -73.979736 19080 Subscriber 1984.0 2 293 Lafayette St & E 8 St
3 307 2013-07-01 06:47:47 2013-07-01 06:52:54 293 Lafayette St & E 8 St 40.730286 -73.990768 445 E 10 St & Avenue A 40.727409 -73.981422 16105 Subscriber 1973.0 1 293 Lafayette St & E 8 St
4 387 2013-07-01 07:11:46 2013-07-01 07:18:13 293 Lafayette St & E 8 St 40.730286 -73.990768 497 E 17 St & Broadway 40.737049 -73.990089 15952 Subscriber 1954.0 1 293 Lafayette St & E 8 St
5 481 2013-07-01 07:13:30 2013-07-01 07:21:31 293 Lafayette St & E 8 St 40.730286 -73.990768 469 Broadway & W 53 St 40.763439 -73.982681 16108 Subscriber 1968.0 1 293 Lafayette St & E 8 St
6 915 2013-07-01 07:42:55 2013-07-01 07:58:10 293 Lafayette St & E 8 St 40.730286 -73.990768 456 E 53 St & Madison Ave 40.759712 -73.974022 18485 Subscriber 1983.0 1 293 Lafayette St & E 8 St
7 332 2013-07-01 07:53:56 2013-07-01 07:59:28 293 Lafayette St & E 8 St 40.730286 -73.990768 380 W 4 St & 7 Ave S 40.734013 -74.002937 19540 Subscriber 1983.0 1 293 Lafayette St & E 8 St
8 687 2013-07-01 07:53:59 2013-07-01 08:05:26 293 Lafayette St & E 8 St 40.730286 -73.990768 519 Pershing Square N 40.751884 -73.977699 16270 Subscriber 1981.0 1 293 Lafayette St & E 8 St
9 472 2013-07-01 07:58:52 2013-07-01 08:06:44 293 Lafayette St & E 8 St 40.730286 -73.990768 383 Greenwich Ave & Charles St 40.735237 -74.000275 15830 Subscriber 1989.0 2 293 Lafayette St & E 8 St
10 201 2013-07-01 08:03:14 2013-07-01 08:06:35 293 Lafayette St & E 8 St 40.730286 -73.990768 445 E 10 St & Avenue A 40.727409 -73.981422 17172 Subscriber 1983.0 1 293 Lafayette St & E 8 St
11 1473 2013-07-01 08:07:14 2013-07-01 08:31:47 293 Lafayette St & E 8 St 40.730286 -73.990768 484 W 44 St & 5 Ave 40.755001 -73.980141 18883 Subscriber 1979.0 1 293 Lafayette St & E 8 St
12 1368 2013-07-01 08:09:13 2013-07-01 08:32:01 293 Lafayette St & E 8 St 40.730286 -73.990768 522 E 51 St & Lexington Ave 40.757149 -73.972076 16236 Subscriber 1961.0 2 293 Lafayette St & E 8 St
13 507 2013-07-01 08:20:27 2013-07-01 08:28:54 293 Lafayette St & E 8 St 40.730286 -73.990768 151 Cleveland Pl & Spring St 40.721817 -73.997200 17838 Subscriber 1985.0 2 293 Lafayette St & E 8 St
14 470 2013-07-01 08:22:13 2013-07-01 08:30:03 293 Lafayette St & E 8 St 40.730286 -73.990768 546 E 30 St & Park Ave S 40.744450 -73.983032 18607 Subscriber 1985.0 1 293 Lafayette St & E 8 St
15 673 2013-07-01 08:23:45 2013-07-01 08:34:58 293 Lafayette St & E 8 St 40.730286 -73.990768 546 E 30 St & Park Ave S 40.744450 -73.983032 18849 Subscriber 1981.0 2 293 Lafayette St & E 8 St
16 460 2013-07-01 08:25:19 2013-07-01 08:32:59 293 Lafayette St & E 8 St 40.730286 -73.990768 377 6 Ave & Canal St 40.722439 -74.005661 17104 Subscriber 1970.0 1 293 Lafayette St & E 8 St
17 558 2013-07-01 08:30:25 2013-07-01 08:39:43 293 Lafayette St & E 8 St 40.730286 -73.990768 320 Church St & Leonard St 40.717442 -74.005836 18092 Subscriber 1976.0 1 293 Lafayette St & E 8 St
18 458 2013-07-01 08:37:16 2013-07-01 08:44:54 293 Lafayette St & E 8 St 40.730286 -73.990768 445 E 10 St & Avenue A 40.727409 -73.981422 19981 Customer NaN 0 293 Lafayette St & E 8 St
19 1119 2013-07-01 08:44:37 2013-07-01 09:03:16 293 Lafayette St & E 8 St 40.730286 -73.990768 330 Reade St & Broadway 40.714504 -74.005630 17955 Subscriber 1973.0 1 293 Lafayette St & E 8 St
20 453 2013-07-01 08:50:18 2013-07-01 08:57:51 293 Lafayette St & E 8 St 40.730286 -73.990768 323 Lawrence St & Willoughby St 40.692360 -73.986320 15215 Subscriber 1974.0 2 293 Lafayette St & E 8 St
21 453 2013-07-01 08:50:39 2013-07-01 08:58:12 293 Lafayette St & E 8 St 40.730286 -73.990768 151 Cleveland Pl & Spring St 40.721817 -73.997200 18613 Subscriber 1953.0 2 293 Lafayette St & E 8 St
22 1055 2013-07-01 09:02:12 2013-07-01 09:19:47 293 Lafayette St & E 8 St 40.730286 -73.990768 360 William St & Pine St 40.707180 -74.008873 19020 Subscriber 1986.0 1 293 Lafayette St & E 8 St
23 235 2013-07-01 09:03:56 2013-07-01 09:07:51 293 Lafayette St & E 8 St 40.730286 -73.990768 445 E 10 St & Avenue A 40.727409 -73.981422 16587 Subscriber 1981.0 1 293 Lafayette St & E 8 St
24 813 2013-07-01 09:06:01 2013-07-01 09:19:34 293 Lafayette St & E 8 St 40.730286 -73.990768 79 Franklin St & W Broadway 40.719116 -74.006668 14943 Customer NaN 0 293 Lafayette St & E 8 St
25 488 2013-07-01 09:16:12 2013-07-01 09:24:20 293 Lafayette St & E 8 St 40.730286 -73.990768 410 Suffolk St & Stanton St 40.720665 -73.985176 16821 Subscriber 1983.0 2 293 Lafayette St & E 8 St
26 663 2013-07-01 09:20:18 2013-07-01 09:31:21 293 Lafayette St & E 8 St 40.730286 -73.990768 419 Carlton Ave & Park Ave 40.695808 -73.973557 20289 Subscriber 1969.0 1 293 Lafayette St & E 8 St
27 582 2013-07-01 09:27:02 2013-07-01 09:36:44 293 Lafayette St & E 8 St 40.730286 -73.990768 377 6 Ave & Canal St 40.722439 -74.005661 15740 Subscriber 1954.0 2 293 Lafayette St & E 8 St
28 369 2013-07-01 09:34:34 2013-07-01 09:40:43 293 Lafayette St & E 8 St 40.730286 -73.990768 419 Carlton Ave & Park Ave 40.695808 -73.973557 17653 Subscriber 1985.0 2 293 Lafayette St & E 8 St
29 1443 2013-07-01 09:37:06 2013-07-01 10:01:09 293 Lafayette St & E 8 St 40.730286 -73.990768 363 West Thames St 40.708347 -74.017136 17753 Subscriber 1963.0 2 293 Lafayette St & E 8 St
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
819 849 2016-12-30 17:23:53 2016-12-30 17:38:03 2021 W 45 St & 8 Ave 40.759293 -73.988594 2017 E 43 St & 2 Ave 40.750225 -73.971214 21425 Subscriber 1971.0 1 2017 E 43 St & 2 Ave
820 176 2016-12-30 18:31:55 2016-12-30 18:34:51 516 E 47 St & 1 Ave 40.752068 -73.967842 2017 E 43 St & 2 Ave 40.750225 -73.971214 18189 Subscriber 1947.0 1 2017 E 43 St & 2 Ave
821 542 2016-12-30 18:53:13 2016-12-30 19:02:16 3134 3 Ave & E 62 St 40.763126 -73.965271 2017 E 43 St & 2 Ave 40.750225 -73.971214 26070 Subscriber 1984.0 1 2017 E 43 St & 2 Ave
822 644 2016-12-30 20:12:49 2016-12-30 20:23:33 507 E 25 St & 2 Ave 40.739124 -73.979736 2017 E 43 St & 2 Ave 40.750225 -73.971214 18769 Subscriber 1979.0 1 2017 E 43 St & 2 Ave
823 447 2016-12-30 20:15:16 2016-12-30 20:22:43 520 W 52 St & 5 Ave 40.759922 -73.976486 2017 E 43 St & 2 Ave 40.750225 -73.971214 15298 Subscriber 1993.0 1 2017 E 43 St & 2 Ave
824 631 2016-12-30 20:36:26 2016-12-30 20:46:58 3264 E 61 St & Park Ave 40.764210 -73.969101 2017 E 43 St & 2 Ave 40.750225 -73.971214 25940 Subscriber 1975.0 1 2017 E 43 St & 2 Ave
825 555 2016-12-30 20:38:28 2016-12-30 20:47:43 520 W 52 St & 5 Ave 40.759922 -73.976486 2017 E 43 St & 2 Ave 40.750225 -73.971214 26498 Subscriber 1981.0 2 2017 E 43 St & 2 Ave
826 599 2016-12-30 20:52:42 2016-12-30 21:02:41 476 E 31 St & 3 Ave 40.743942 -73.979660 2017 E 43 St & 2 Ave 40.750225 -73.971214 19728 Subscriber 1956.0 1 2017 E 43 St & 2 Ave
827 675 2016-12-30 20:52:45 2016-12-30 21:04:01 476 E 31 St & 3 Ave 40.743942 -73.979660 2017 E 43 St & 2 Ave 40.750225 -73.971214 23888 Subscriber 1956.0 2 2017 E 43 St & 2 Ave
828 658 2016-12-30 23:59:36 2016-12-31 00:10:35 497 E 17 St & Broadway 40.737049 -73.990089 2017 E 43 St & 2 Ave 40.750225 -73.971214 22829 Subscriber 1988.0 1 2017 E 43 St & 2 Ave
829 1301 2016-12-31 07:07:37 2016-12-31 07:29:18 422 W 59 St & 10 Ave 40.770512 -73.988037 2017 E 43 St & 2 Ave 40.750225 -73.971214 21634 Subscriber 1988.0 1 2017 E 43 St & 2 Ave
830 605 2016-12-31 09:59:15 2016-12-31 10:09:21 537 Lexington Ave & E 24 St 40.740257 -73.984093 2017 E 43 St & 2 Ave 40.750225 -73.971214 14816 Subscriber 1986.0 1 2017 E 43 St & 2 Ave
831 356 2016-12-31 18:31:10 2016-12-31 18:37:07 305 E 58 St & 3 Ave 40.760956 -73.967247 2017 E 43 St & 2 Ave 40.750225 -73.971214 16863 Subscriber 1986.0 1 2017 E 43 St & 2 Ave
832 320 2016-12-31 18:49:12 2016-12-31 18:54:33 454 E 51 St & 1 Ave 40.754559 -73.965927 2017 E 43 St & 2 Ave 40.750225 -73.971214 25544 Subscriber 1948.0 1 2017 E 43 St & 2 Ave
833 292 2016-12-29 10:46:47 2016-12-29 10:51:40 432 E 7 St & Avenue A 40.726219 -73.983795 3219 NYCBS Depot - STY 40.729195 -73.976654 17981 Subscriber 1991.0 1 3219 NYCBS Depot - STY
834 843 2016-12-29 18:48:21 2016-12-29 19:02:25 2010 Grand St & Greene St 40.721653 -74.002350 3219 NYCBS Depot - STY 40.729195 -73.976654 19636 Subscriber 1986.0 1 3219 NYCBS Depot - STY
835 489 2016-12-30 11:14:24 2016-12-30 11:22:33 475 E 16 St & Irving Pl 40.735241 -73.987587 3219 NYCBS Depot - STY 40.729195 -73.976654 25244 Subscriber 1961.0 1 3219 NYCBS Depot - STY
836 425 2016-12-30 14:47:24 2016-12-30 14:54:30 285 Broadway & E 14 St 40.734547 -73.990738 3219 NYCBS Depot - STY 40.729195 -73.976654 25347 Subscriber 1992.0 1 3219 NYCBS Depot - STY
837 10118 2016-12-30 17:34:29 2016-12-30 20:23:07 307 Canal St & Rutgers St 40.714275 -73.989899 3219 NYCBS Depot - STY 40.729195 -73.976654 19385 Customer NaN 0 3219 NYCBS Depot - STY
838 364 2016-12-30 18:41:52 2016-12-30 18:47:57 504 1 Ave & E 16 St 40.732220 -73.981659 3219 NYCBS Depot - STY 40.729195 -73.976654 21322 Subscriber 1992.0 1 3219 NYCBS Depot - STY
846 774 2016-12-29 16:04:17 2016-12-29 16:17:12 500 Broadway & W 51 St 40.762287 -73.983360 3017 NYCBS Depot - FAR 40.751484 -73.996765 25002 Subscriber 1988.0 1 3017 NYCBS Depot - FAR
847 1767 2016-12-29 19:31:28 2016-12-29 20:00:55 3345 Madison Ave & E 99 St 40.789486 -73.952431 3237 21 St & 41 Ave 40.753834 -73.942680 26371 Subscriber 1963.0 1 3237 21 St & 41 Ave
848 896 2016-12-30 13:26:43 2016-12-30 13:41:39 3113 Greenpoint Ave & Manhattan Ave 40.730259 -73.953941 3237 21 St & 41 Ave 40.753834 -73.942680 25186 Subscriber 1978.0 1 3237 21 St & 41 Ave
849 253 2016-12-30 13:44:19 2016-12-30 13:48:32 3126 44 Dr & Jackson Ave 40.747181 -73.943260 3237 21 St & 41 Ave 40.753834 -73.942680 26703 Subscriber 1947.0 1 3237 21 St & 41 Ave
850 1754 2016-12-30 14:37:57 2016-12-30 15:07:11 402 Broadway & E 22 St 40.740345 -73.989548 3237 21 St & 41 Ave 40.753834 -73.942680 25947 Subscriber 1963.0 1 3237 21 St & 41 Ave
851 363 2016-12-31 01:29:36 2016-12-31 01:35:39 3121 Jackson Ave & 46 Rd 40.745247 -73.947334 3237 21 St & 41 Ave 40.753834 -73.942680 16471 Subscriber 1959.0 2 3237 21 St & 41 Ave
852 1580 2016-12-31 11:26:50 2016-12-31 11:53:11 3119 Vernon Blvd & 50 Ave 40.742329 -73.954117 3237 21 St & 41 Ave 40.753834 -73.942680 26738 Subscriber 1956.0 2 3237 21 St & 41 Ave
853 2590 2016-12-31 13:21:52 2016-12-31 14:05:02 261 Johnson St & Gold St 40.694748 -73.983627 3237 21 St & 41 Ave 40.753834 -73.942680 20954 Subscriber 1982.0 1 3237 21 St & 41 Ave
854 329 2016-12-31 13:27:26 2016-12-31 13:32:55 3126 44 Dr & Jackson Ave 40.747181 -73.943260 3237 21 St & 41 Ave 40.753834 -73.942680 25327 Subscriber 1996.0 2 3237 21 St & 41 Ave
855 1637 2016-12-30 08:38:12 2016-12-30 09:05:30 3319 14 St & 5 Ave 40.666286 -73.988953 255 NYCBS Depot - SSP 40.646767 -74.016510 24953 Subscriber 1988.0 1 255 NYCBS Depot - SSP

5521461 rows × 17 columns

^^

Over five million rows that start or end at defunct stations!

We can't drop that many given the total is 36 million!


In [56]:
defunct_start_trips_count = defunct_trips_all[['start_station_id', 'start_station_name', 'trip_duration']].groupby(['start_station_id', 'start_station_name']).count().sort_values('trip_duration', 
                                                                                               ascending=False)
bad_ids = defunct_start_trips_count[defunct_start_trips_count.trip_duration < 50].reset_index()[['start_station_id']]
defunct_start_trips_count[defunct_start_trips_count.trip_duration < 50]


Out[56]:
trip_duration
start_station_id start_station_name
3423 West Drive & Prospect Park West 49
3329 Degraw St & Smith St 48
3414 Bergen St & Flatbush Ave 47
3339 Berkeley Pl & 6 Ave 45
3438 E 76 St & 3 Ave 45
3363 E 102 St & Park Ave 43
3404 7 St & 5 Ave 43
3419 Douglass St & 4 Ave 42
3355 E 66 St & Madison Ave 42
3328 W 100 St & Manhattan Ave 40
3316 W 104 St & Amsterdam Ave 39
3366 West End Ave & W 107 St 39
3399 7 St & 3 Ave 38
3347 Van Brunt St & Wolcott St 38
3322 12 St & 4 Ave 38
3343 W 107 St & Columbus Ave 37
3354 3 St & Prospect Park West 36
3356 Amsterdam Ave & W 66 St 36
3310 14 St & 7 Ave 36
3130 21 St & Queens Plaza North 36
3319 14 St & 5 Ave 36
3317 10 St & 5 Ave 35
3358 Garfield Pl & 8 Ave 34
3390 E 109 St & 3 Ave 34
3365 3 St & 7 Ave 33
3346 Berkeley Pl & 7 Ave 31
3384 Smith St & 3 St 31
3250 NYCBS Depot - PIT 31
3413 Wyckoff St & 3 Ave 30
3351 E 102 St & 1 Ave 30
... ... ...
3300 Prospect Park West & 8 St 19
3370 78 St & 2 Ave 18
3340 Wolcott St & Dwight St 18
3239 Bressler 18
3327 3 Ave & E 100 St 17
3014 3969.TEMP (Bike The Branches - Central Branch) 16
3338 2 Ave & E 99 St 15
3398 Smith St & 9 St 14
3391 E 106 St & 1 Ave 14
3348 Coffey St & Conover St 14
3017 NYCBS Depot - FAR 13
3040 SSP Tech Workshop 12
3440 Fulton St & Adams St 12
3387 E 106 St & Madison Ave 12
3036 8D OPS 01 10
3342 Pioneer St & Richards St 10
3379 E 103 St & Lexington Ave 9
3405 5 St & 6 Ave 9
3266 Kiosk in a box Deployment 7
3393 Richards St & Delavan St 7
3333 Columbia St & Lorraine St 7
3395 Henry St & W 9 St 6
3424 E 106 St & Lexington Ave 6
3394 Columbia St & W 9 St 4
3330 Henry St & Bay St 3
3326 Clinton St & Centre St 3
3252 Bike The Branches - Red Hook Branch 2
3371 4 Ave & 2 St 1
3385 2 Ave & E 105 St 1
3240 NYCBS Depot BAL - DYR 1

77 rows × 1 columns


In [57]:
defunct_end_trips_count = defunct_trips_all[[
    'end_station_id', 'end_station_name', 'trip_duration']].groupby(
    ['end_station_id', 'end_station_name']).count().sort_values('trip_duration', ascending=False)
bad_ids_2 = (
    defunct_end_trips_count[defunct_end_trips_count.trip_duration < 50].reset_index()
)[['end_station_id']]
defunct_end_trips_count[defunct_end_trips_count.trip_duration < 50]


Out[57]:
trip_duration
end_station_id end_station_name
3423 West Drive & Prospect Park West 49
3341 Central Park West & W 102 St 48
3414 Bergen St & Flatbush Ave 47
3283 W 89 St & Columbus Ave 44
3404 7 St & 5 Ave 43
3344 Pioneer St & Van Brunt St 39
3353 Reed St & Van Brunt St 37
3413 Wyckoff St & 3 Ave 37
3384 Smith St & 3 St 36
3407 Union St & Nevins St 36
3347 Van Brunt St & Wolcott St 35
3322 12 St & 4 Ave 35
3356 Amsterdam Ave & W 66 St 34
3351 E 102 St & 1 Ave 34
3317 10 St & 5 Ave 33
3304 6 Ave & 9 St 32
3438 E 76 St & 3 Ave 31
3320 Central Park West & W 100 St 30
3392 Commerce St & Van Brunt St 30
3319 14 St & 5 Ave 30
3310 14 St & 7 Ave 30
3345 Madison Ave & E 99 St 30
3403 4 Ave & 9 St 27
3316 W 104 St & Amsterdam Ave 27
3363 E 102 St & Park Ave 26
3425 2 Ave & E 104 St 25
3166 Riverside Dr & W 72 St 24
3434 W 88 St & West End Ave 23
3343 W 107 St & Columbus Ave 22
3309 E 97 St & 3 Ave 22
... ... ...
3354 3 St & Prospect Park West 15
3381 3 St & Hoyt St 15
3313 6 Ave & 12 St 15
3333 Columbia St & Lorraine St 14
3240 NYCBS Depot BAL - DYR 14
3401 2 Ave & 9 St 12
3326 Clinton St & Centre St 12
3358 Garfield Pl & 8 Ave 11
3393 Richards St & Delavan St 11
3036 8D OPS 01 10
3330 Henry St & Bay St 10
3327 3 Ave & E 100 St 10
3424 E 106 St & Lexington Ave 9
3040 SSP Tech Workshop 9
3405 5 St & 6 Ave 8
3346 Berkeley Pl & 7 Ave 7
3302 Columbus Ave & W 103 St 7
3252 Bike The Branches - Red Hook Branch 7
3379 E 103 St & Lexington Ave 7
3440 Fulton St & Adams St 7
3266 Kiosk in a box Deployment 6
3370 78 St & 2 Ave 6
3186 Grove St PATH 6
3338 2 Ave & E 99 St 5
3014 3969.TEMP (Bike The Branches - Central Branch) 3
3395 Henry St & W 9 St 3
3385 2 Ave & E 105 St 2
3394 Columbia St & W 9 St 1
3187 Warren St 1
3439 Broadway & E 22 St - Valet Scan 1

77 rows × 1 columns


In [68]:
bad_ids.columns =['id']
bad_ids_2.columns = ['id']
bad_ids = bad_ids.append(bad_ids_2).drop_duplicates().sort_values('id').reset_index(drop=True)
from IPython.display import display, HTML 
display(HTML(bad_ids.T.to_html()))


0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85
id 3014 3017 3036 3040 3130 3166 3186 3187 3239 3240 3250 3252 3266 3283 3297 3300 3302 3304 3306 3309 3310 3313 3316 3317 3319 3320 3322 3326 3327 3328 3329 3330 3332 3333 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3351 3353 3354 3355 3356 3358 3363 3365 3366 3370 3371 3379 3381 3384 3385 3387 3390 3391 3392 3393 3394 3395 3398 3399 3401 3403 3404 3405 3407 3413 3414 3419 3421 3423 3424 3425 3432 3434 3438 3439 3440

In [73]:
bad_ids.id.T.values


Out[73]:
array([3014, 3017, 3036, 3040, 3130, 3166, 3186, 3187, 3239, 3240, 3250,
       3252, 3266, 3283, 3297, 3300, 3302, 3304, 3306, 3309, 3310, 3313,
       3316, 3317, 3319, 3320, 3322, 3326, 3327, 3328, 3329, 3330, 3332,
       3333, 3338, 3339, 3340, 3341, 3342, 3343, 3344, 3345, 3346, 3347,
       3348, 3351, 3353, 3354, 3355, 3356, 3358, 3363, 3365, 3366, 3370,
       3371, 3379, 3381, 3384, 3385, 3387, 3390, 3391, 3392, 3393, 3394,
       3395, 3398, 3399, 3401, 3403, 3404, 3405, 3407, 3413, 3414, 3419,
       3421, 3423, 3424, 3425, 3432, 3434, 3438, 3439, 3440])

In [76]:
df2 = df.merge(bad_ids, left_on='start_station_id', right_on='id', how='left').merge(
    bad_ids, left_on='end_station_id', right_on='id', how='left'
)

In [82]:
df3 = df2[df2.id_x.isnull() & df2.id_y.isnull()]

In [83]:
df3.start_station_id.compute().shape


Out[83]:
(36589081,)

In [81]:
df.start_station_id.compute().shape


Out[81]:
(36902025,)

In [ ]: