In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
%matplotlib inline
In [2]:
df = pd.read_csv('~/Downloads/sept2016-citibike.csv',parse_dates=['Start Time','Stop Time'])
In [3]:
df.head(1)
Out[3]:
In [4]:
df.dtypes
Out[4]:
In [5]:
df.head(2)
Out[5]:
In [6]:
df.describe()
Out[6]:
In [7]:
df.tail(4)
Out[7]:
In [8]:
df_stations = pd.DataFrame(data=df['Start Station Name'].unique(),index=df['Start Station ID'].unique())
In [9]:
df_stations.head(1)
Out[9]:
In [10]:
df_stations['Check-outs'] = df['Start Station ID'].value_counts()[df_stations.index]
In [11]:
df_stations['Check-ins'] = df['End Station ID'].value_counts()[df_stations.index]
In [12]:
df_stations['Net'] = df_stations['Check-ins'] - df_stations['Check-outs']
In [13]:
df_stations.Net.sum()
Out[13]:
In [14]:
df_stations
Out[14]:
In [15]:
df_coor = df[['Start Station ID','Start Station Latitude','Start Station Longitude']].drop_duplicates()
In [16]:
df_coor = df_coor.set_index('Start Station ID')
In [17]:
df_stations = df_stations.join(df_coor)
In [18]:
df_stations.columns
Out[18]:
In [19]:
df_stations.columns = ['Location','Check-outs','Check-ins','Net','Latitude','Longitude']
In [20]:
df_stations
Out[20]:
In [21]:
import gmaps
In [22]:
api_key = 'AIzaSyB-Jw3Hz81d-BiD1iSwe1EuUYW6kq2RGUk'
In [23]:
df_stations['lat-long'] = list(zip(df_stations.Latitude,df_stations.Longitude))
In [24]:
df_stations.head(1)
Out[24]:
In [25]:
gmaps.configure(api_key=api_key)
In [26]:
locations = df_stations['lat-long']
In [27]:
m = gmaps.Map()
In [28]:
m.add_layer(gmaps.heatmap_layer(locations))
In [29]:
m
In [ ]: