capital_bikeshare


Capital Bikeshare


In [5]:
%matplotlib inline
import re, os
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns; sns.set()
from IPython.display import display

In [6]:
import sys; sys.path.insert(1, os.path.join(sys.path[0], '..'))
from utils.bikeshare import get_bikeshare_filename, get_year_quarter_tuples, download_bikeshare_data

In [7]:
years = [2015, 2016]
data_folder = "../data/bikeshare/"
os.makedirs(data_folder, exist_ok=True)

print("Downloading Capital Bikeshare Data for {years} to {folder}".format(years=years, folder=data_folder))
print()

for year, quarter in get_year_quarter_tuples(years):
    download_bikeshare_data(year, quarter, data_folder)


Downloading Capital Bikeshare Data for [2015, 2016] to ../data/bikeshare/

Data Exists: 2015 Q1 | ../data/bikeshare/2015-Q1-cabi-trip-history-data.csv
Data Exists: 2015 Q2 | ../data/bikeshare/2015-Q2-cabi-trip-history-data.csv
Data Exists: 2015 Q3 | ../data/bikeshare/2015-Q3-cabi-trip-history-data.csv
Data Exists: 2015 Q4 | ../data/bikeshare/2015-Q4-cabi-trip-history-data.csv
Data Exists: 2016 Q1 | ../data/bikeshare/2016-Q1-cabi-trip-history-data.csv

In [8]:
df = pd.read_csv("../data/bikeshare/2016-Q1-cabi-trip-history-data.csv")

In [9]:
df.head()


Out[9]:
Duration (ms) Start date End date Start station number Start station End station number End station Bike number Member Type
0 301295 3/31/2016 23:59 4/1/2016 0:04 31280 11th & S St NW 31506 1st & Rhode Island Ave NW W00022 Registered
1 557887 3/31/2016 23:59 4/1/2016 0:08 31275 New Hampshire Ave & 24th St NW 31114 18th St & Wyoming Ave NW W01294 Registered
2 555944 3/31/2016 23:59 4/1/2016 0:08 31101 14th & V St NW 31221 18th & M St NW W01416 Registered
3 766916 3/31/2016 23:57 4/1/2016 0:09 31226 34th St & Wisconsin Ave NW 31214 17th & Corcoran St NW W01090 Registered
4 139656 3/31/2016 23:57 3/31/2016 23:59 31011 23rd & Crystal Dr 31009 27th & Crystal Dr W21934 Registered

In [1]:
# df[(df.start_date.dt.dayofweek>=5)].start_date.dt.hour.value_counts().sort_index().plot()
# df[(df.start_date.dt.dayofweek<5)].start_date.dt.hour.value_counts().sort_index().plot()

In [ ]: