Station-level User Summaries

This notebook generates the station-level summaries of the split between customers/subscribers (daily pass users vs. Divvy Members) and the gender of the subscriber trips. This is the data underlying the donut charts in the visualization.


In [1]:
from __future__ import print_function, division
import pandas as pd

In [2]:
# Standard data import
trips = pd.read_csv('../data/Divvy_Stations_Trips_2013/Divvy_Trips_2013.csv')
stations = pd.read_csv('../data/ivvy_Stations_Trips_2013/Divvy_Stations_2013.csv')
# Convert to numeric
trips.from_station_id = trips.from_station_id.convert_objects(convert_numeric=True)
trips.to_station_id = trips.to_station_id.convert_objects(convert_numeric=True)


/usr/local/lib/python2.7/dist-packages/pandas/io/parsers.py:1070: DtypeWarning: Columns (10) have mixed types. Specify dtype option on import or set low_memory=False.
  data = self._reader.read(nrows)

In [3]:
# Group by from_station_name and gender
# Convert pandas groupby into a DataFrame
gf = pd.DataFrame(trips.groupby(by=['from_station_name','gender'])['trip_id'].count())
# Rename the summary column
gf.rename(columns={0:'trips'}, inplace=True)
# Write to csv
gf.to_csv('../data/gender.csv')

In [4]:
# Repeat same steps but grouping by usertype instead of gender
uf = pd.DataFrame(trips.groupby(by=['from_station_name','usertype'])['trip_id'].count())
uf.rename(columns={0:'trips'}, inplace=True)
uf.to_csv('../data/user.csv')

In [ ]: