Imports


In [1]:
import numpy as np
import pandas as pd

pd.set_option('display.max_columns', 60)

Data

Status


In [2]:
df_status = pd.read_csv('DATA/babs_master/status_master.csv')

In [3]:
df_status.drop('docks_available', axis=1, inplace=True)

In [4]:
df_status.head()


Out[4]:
station_id bikes_available time
0 2 2 2013/08/29 12:06:01
1 2 2 2013/08/29 12:07:01
2 2 2 2013/08/29 12:08:01
3 2 2 2013/08/29 12:09:01
4 2 2 2013/08/29 12:10:01

In [5]:
df_status.shape


Out[5]:
(71984434, 3)

In [6]:
df_status['time'] = pd.to_datetime(df_status.time)

In [7]:
df_status.set_index('time', drop=True, inplace=True)

Hourly


In [8]:
df_status_hourly = df_status.groupby('station_id').resample('60T', how='mean')

In [9]:
df_status_hourly['bikes_available'] = np.round(df_status_hourly['bikes_available'], 0)

In [10]:
df_status_hourly.reset_index(inplace=True)

In [11]:
df_status_hourly.head()


Out[11]:
station_id time bikes_available
0 2 2013-08-29 12:00:00 2
1 2 2013-08-29 13:00:00 3
2 2 2013-08-29 14:00:00 2
3 2 2013-08-29 15:00:00 2
4 2 2013-08-29 16:00:00 2

In [12]:
df_status_hourly.shape


Out[12]:
(1208635, 3)

In [13]:
df_status_hourly.dropna(inplace=True)

In [14]:
df_status_hourly.shape


Out[14]:
(1204800, 3)

Output


In [15]:
df_status_hourly.to_csv('DATA/babs_master/status_master_60m.csv', index=False)

In [ ]: