Monthly Staion-level Summaries

This notebook generates the monthly time series of total trips by station. It is used in the bar chart component of the visualization.

This notebook demonstrates the usage of some of the pandas datetime functionality.


In [1]:
from __future__ import print_function, division
import pandas as pd
import locale
import datetime

In [5]:
trips = pd.read_csv('../data/Divvy_Stations_Trips_2013/Divvy_Trips_2013.csv')
stations = pd.read_csv('../data/Divvy_Stations_Trips_2013/Divvy_Stations_2013.csv')
# Convert to numeric
trips.from_station_id = trips.from_station_id.convert_objects(convert_numeric=True)
trips.to_station_id = trips.to_station_id.convert_objects(convert_numeric=True)

# Convert trip duration to numeric
locale.setlocale(locale.LC_NUMERIC, '')
trips.tripduration = trips.tripduration.apply(locale.atof)

# Convert date columns to pandas datetime objects
trips.starttime = pd.to_datetime(trips.starttime)
trips.stoptime = pd.to_datetime(trips.stoptime)


/usr/local/lib/python2.7/dist-packages/pandas/io/parsers.py:1070: DtypeWarning: Columns (10) have mixed types. Specify dtype option on import or set low_memory=False.
  data = self._reader.read(nrows)

In [6]:
# Create a month column by applying the datetime month attribute
trips['mth'] = trips.starttime.apply(lambda x: x.month)

In [7]:
# Group by from_station_name and month and generate a data frame
mf = pd.DataFrame(trips.groupby(by=['from_station_name', 'mth'])['trip_id'].count())
# Rename summary column to reflect contents
mf.rename(columns={0: 'mthlytrips'}, inplace=True)

In [10]:
# And that's it
mf.to_csv('../data/mnthlytrips.csv')

In [ ]: