In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib

%matplotlib inline

In [2]:
df = pd.read_csv('~/Downloads/sept2016-citibike.csv',parse_dates=['Start Time','Stop Time'])

In [3]:
df.head(1)


Out[3]:
Trip Duration Start Time Stop Time Start Station ID Start Station Name Start Station Latitude Start Station Longitude End Station ID End Station Name End Station Latitude End Station Longitude Bike ID User Type Birth Year Gender
0 254 2016-09-01 00:06:09 2016-09-01 00:10:24 3273 Manila & 1st 40.721651 -74.042884 3203 Hamilton Park 40.727596 -74.044247 26157 Subscriber 1989.0 2

In [4]:
df.dtypes


Out[4]:
Trip Duration                       int64
Start Time                 datetime64[ns]
Stop Time                  datetime64[ns]
Start Station ID                    int64
Start Station Name                 object
Start Station Latitude            float64
Start Station Longitude           float64
End Station ID                      int64
End Station Name                   object
End Station Latitude              float64
End Station Longitude             float64
Bike ID                             int64
User Type                          object
Birth Year                        float64
Gender                              int64
dtype: object

In [5]:
df.head(2)


Out[5]:
Trip Duration Start Time Stop Time Start Station ID Start Station Name Start Station Latitude Start Station Longitude End Station ID End Station Name End Station Latitude End Station Longitude Bike ID User Type Birth Year Gender
0 254 2016-09-01 00:06:09 2016-09-01 00:10:24 3273 Manila & 1st 40.721651 -74.042884 3203 Hamilton Park 40.727596 -74.044247 26157 Subscriber 1989.0 2
1 119 2016-09-01 00:08:04 2016-09-01 00:10:04 3272 Jersey & 3rd 40.723332 -74.045953 3278 Monmouth and 6th 40.725685 -74.048790 24493 Subscriber 1988.0 1

In [6]:
df.describe()


Out[6]:
Trip Duration Start Station ID Start Station Latitude Start Station Longitude End Station ID End Station Latitude End Station Longitude Bike ID Birth Year Gender
count 3.342500e+04 33425.000000 33425.000000 33425.000000 33425.000000 33425.000000 33425.000000 33425.000000 30710.000000 33425.000000
mean 8.539890e+02 3213.032251 40.722550 -74.045938 3209.768048 40.722096 -74.045393 25281.835602 1979.529599 1.130681
std 1.181797e+04 32.699684 0.007972 0.010788 49.116231 0.007715 0.010854 872.598394 9.543978 0.532882
min 6.100000e+01 3183.000000 40.692640 -74.096937 347.000000 40.692640 -74.096937 16644.000000 1937.000000 0.000000
25% 2.470000e+02 3186.000000 40.716247 -74.049968 3186.000000 40.716247 -74.049638 24529.000000 1974.000000 1.000000
50% 3.870000e+02 3202.000000 40.721525 -74.043845 3200.000000 40.719586 -74.043117 24690.000000 1982.000000 1.000000
75% 6.650000e+02 3214.000000 40.727224 -74.038051 3213.000000 40.727224 -74.038051 26219.000000 1986.000000 1.000000
max 1.532001e+06 3281.000000 40.752559 -74.032108 3281.000000 40.760875 -73.957390 26710.000000 1999.000000 2.000000

In [7]:
df.tail(4)


Out[7]:
Trip Duration Start Time Stop Time Start Station ID Start Station Name Start Station Latitude Start Station Longitude End Station ID End Station Name End Station Latitude End Station Longitude Bike ID User Type Birth Year Gender
33421 207 2016-09-30 23:26:20 2016-09-30 23:29:48 3195 Sip Ave 40.730743 -74.063784 3194 McGinley Square 40.725340 -74.067622 26170 Subscriber 1980.0 1
33422 204 2016-09-30 23:40:56 2016-09-30 23:44:20 3276 Marin Light Rail 40.714584 -74.042817 3213 Van Vorst Park 40.718489 -74.047727 24699 Subscriber 1986.0 1
33423 218 2016-09-30 23:51:17 2016-09-30 23:54:55 3195 Sip Ave 40.730743 -74.063784 3225 Baldwin at Montgomery 40.723659 -74.064194 24587 Subscriber 1978.0 2
33424 232 2016-09-30 23:51:21 2016-09-30 23:55:14 3202 Newport PATH 40.727224 -74.033759 3275 Columbus Drive 40.718355 -74.038914 24464 Subscriber 1990.0 1

In [8]:
df_stations = pd.DataFrame(data=df['Start Station Name'].unique(),index=df['Start Station ID'].unique())

In [9]:
df_stations.head(1)


Out[9]:
0
3273 Manila & 1st

In [10]:
df_stations['Check-outs'] = df['Start Station ID'].value_counts()[df_stations.index]

In [11]:
df_stations['Check-ins'] = df['End Station ID'].value_counts()[df_stations.index]

In [12]:
df_stations['Net'] = df_stations['Check-ins'] - df_stations['Check-outs']

In [13]:
df_stations.Net.sum()


Out[13]:
-7

In [14]:
df_stations


Out[14]:
0 Check-outs Check-ins Net
3273 Manila & 1st 435 289 -146
3272 Jersey & 3rd 747 641 -106
3206 Hilltop 357 206 -151
3220 5 Corners Library 352 291 -61
3203 Hamilton Park 1910 2031 121
3211 Newark Ave 1012 993 -19
3225 Baldwin at Montgomery 522 398 -124
3183 Exchange Place 2541 3071 530
3276 Marin Light Rail 1144 1156 12
3194 McGinley Square 933 629 -304
3185 City Hall 955 893 -62
3214 Essex Light Rail 928 1042 114
3209 Brunswick St 925 905 -20
3267 Morris Canal 1456 1416 -40
3207 Oakland Ave 475 398 -77
3193 Lincoln Park 412 495 83
3278 Monmouth and 6th 823 706 -117
3210 Pershing Field 309 296 -13
3199 Newport Pkwy 1068 1018 -50
3184 Paulus Hook 1063 953 -110
3213 Van Vorst Park 942 832 -110
3279 Dixon Mills 564 452 -112
3186 Grove St PATH 3909 5157 1248
3195 Sip Ave 1924 1858 -66
3196 Riverview Park 188 148 -40
3215 Central Ave 222 186 -36
3192 Liberty Light Rail 805 791 -14
3270 Jersey & 6th St 867 606 -261
3275 Columbus Drive 690 596 -94
3187 Warren St 1141 1044 -97
3202 Newport PATH 1617 1775 158
3268 Lafayette Park 178 199 21
3201 Dey St 256 275 19
3212 Christ Hospital 281 222 -59
3191 Union St 27 62 35
3281 Leonard Gordon Park 139 110 -29
3205 JC Medical Center 569 577 8
3197 North St 193 160 -33
3190 Garfield Ave Station 107 93 -14
3198 Heights Elevator 235 209 -26
3188 NJCU 63 70 7
3271 Danforth Light Rail 4 1 -3
3189 West Side Light Rail 21 50 29
3216 Columbia Park 31 31 0
3217 Bayside Park 48 46 -2
3200 MLK Light Rail 22 26 4
3274 Bethune Center 15 15 0

In [15]:
df_coor = df[['Start Station ID','Start Station Latitude','Start Station Longitude']].drop_duplicates()

In [16]:
df_coor = df_coor.set_index('Start Station ID')

In [17]:
df_stations = df_stations.join(df_coor)

In [18]:
df_stations.columns


Out[18]:
Index([                        0,              'Check-outs',
                     'Check-ins',                     'Net',
        'Start Station Latitude', 'Start Station Longitude'],
      dtype='object')

In [19]:
df_stations.columns = ['Location','Check-outs','Check-ins','Net','Latitude','Longitude']

In [20]:
df_stations


Out[20]:
Location Check-outs Check-ins Net Latitude Longitude
3273 Manila & 1st 435 289 -146 40.721651 -74.042884
3272 Jersey & 3rd 747 641 -106 40.723332 -74.045953
3206 Hilltop 357 206 -151 40.731169 -74.057574
3220 5 Corners Library 352 291 -61 40.734961 -74.059503
3203 Hamilton Park 1910 2031 121 40.727596 -74.044247
3211 Newark Ave 1012 993 -19 40.721525 -74.046305
3225 Baldwin at Montgomery 522 398 -124 40.723659 -74.064194
3183 Exchange Place 2541 3071 530 40.716247 -74.033459
3276 Marin Light Rail 1144 1156 12 40.714584 -74.042817
3194 McGinley Square 933 629 -304 40.725340 -74.067622
3185 City Hall 955 893 -62 40.717733 -74.043845
3214 Essex Light Rail 928 1042 114 40.712774 -74.036486
3209 Brunswick St 925 905 -20 40.724176 -74.050656
3267 Morris Canal 1456 1416 -40 40.712419 -74.038526
3207 Oakland Ave 475 398 -77 40.737604 -74.052478
3193 Lincoln Park 412 495 83 40.724605 -74.078406
3278 Monmouth and 6th 823 706 -117 40.725685 -74.048790
3210 Pershing Field 309 296 -13 40.742677 -74.051789
3199 Newport Pkwy 1068 1018 -50 40.728745 -74.032108
3184 Paulus Hook 1063 953 -110 40.714145 -74.033552
3213 Van Vorst Park 942 832 -110 40.718489 -74.047727
3279 Dixon Mills 564 452 -112 40.721630 -74.049968
3186 Grove St PATH 3909 5157 1248 40.719586 -74.043117
3195 Sip Ave 1924 1858 -66 40.730743 -74.063784
3196 Riverview Park 188 148 -40 40.744319 -74.043991
3215 Central Ave 222 186 -36 40.746730 -74.049251
3192 Liberty Light Rail 805 791 -14 40.711242 -74.055701
3270 Jersey & 6th St 867 606 -261 40.725289 -74.045572
3275 Columbus Drive 690 596 -94 40.718355 -74.038914
3187 Warren St 1141 1044 -97 40.721124 -74.038051
3202 Newport PATH 1617 1775 158 40.727224 -74.033759
3268 Lafayette Park 178 199 21 40.713464 -74.062859
3201 Dey St 256 275 19 40.737711 -74.066921
3212 Christ Hospital 281 222 -59 40.734786 -74.050444
3191 Union St 27 62 35 40.718211 -74.083639
3281 Leonard Gordon Park 139 110 -29 40.745910 -74.057271
3205 JC Medical Center 569 577 8 40.716540 -74.049638
3197 North St 193 160 -33 40.752559 -74.044725
3190 Garfield Ave Station 107 93 -14 40.710467 -74.070039
3198 Heights Elevator 235 209 -26 40.748716 -74.040443
3188 NJCU 63 70 7 40.710109 -74.085849
3271 Danforth Light Rail 4 1 -3 40.692640 -74.088012
3189 West Side Light Rail 21 50 29 40.714402 -74.088772
3216 Columbia Park 31 31 0 40.697030 -74.096937
3217 Bayside Park 48 46 -2 40.698651 -74.082080
3200 MLK Light Rail 22 26 4 40.711130 -74.078885
3274 Bethune Center 15 15 0 40.704958 -74.085931

In [21]:
import gmaps

In [22]:
api_key = 'AIzaSyB-Jw3Hz81d-BiD1iSwe1EuUYW6kq2RGUk'

In [23]:
df_stations['lat-long'] = list(zip(df_stations.Latitude,df_stations.Longitude))

In [24]:
df_stations.head(1)


Out[24]:
Location Check-outs Check-ins Net Latitude Longitude lat-long
3273 Manila & 1st 435 289 -146 40.721651 -74.042884 (40.7216507249, -74.0428841114)

In [25]:
gmaps.configure(api_key=api_key)

In [26]:
locations = df_stations['lat-long']

In [27]:
m = gmaps.Map()

In [28]:
m.add_layer(gmaps.heatmap_layer(locations))

In [29]:
m

In [ ]: