In [1]:
from __future__ import print_function, division
import pandas as pd
import locale
import datetime
In [5]:
trips = pd.read_csv('../data/Divvy_Stations_Trips_2013/Divvy_Trips_2013.csv')
stations = pd.read_csv('../data/Divvy_Stations_Trips_2013/Divvy_Stations_2013.csv')
# Convert to numeric
trips.from_station_id = trips.from_station_id.convert_objects(convert_numeric=True)
trips.to_station_id = trips.to_station_id.convert_objects(convert_numeric=True)
# Convert trip duration to numeric
locale.setlocale(locale.LC_NUMERIC, '')
trips.tripduration = trips.tripduration.apply(locale.atof)
# Convert date columns to pandas datetime objects
trips.starttime = pd.to_datetime(trips.starttime)
trips.stoptime = pd.to_datetime(trips.stoptime)
In [6]:
# Create a month column by applying the datetime month attribute
trips['mth'] = trips.starttime.apply(lambda x: x.month)
In [7]:
# Group by from_station_name and month and generate a data frame
mf = pd.DataFrame(trips.groupby(by=['from_station_name', 'mth'])['trip_id'].count())
# Rename summary column to reflect contents
mf.rename(columns={0: 'mthlytrips'}, inplace=True)
In [10]:
# And that's it
mf.to_csv('../data/mnthlytrips.csv')
In [ ]: