In [1]:
import numpy as np
import pandas as pd
pd.set_option('display.max_columns', 60)
In [2]:
df_status = pd.read_csv('DATA/babs_master/status_master.csv')
In [3]:
df_status.drop('docks_available', axis=1, inplace=True)
In [4]:
df_status.head()
Out[4]:
In [5]:
df_status.shape
Out[5]:
In [6]:
df_status['time'] = pd.to_datetime(df_status.time)
In [7]:
df_status.set_index('time', drop=True, inplace=True)
In [8]:
df_status_hourly = df_status.groupby('station_id').resample('60T', how='mean')
In [9]:
df_status_hourly['bikes_available'] = np.round(df_status_hourly['bikes_available'], 0)
In [10]:
df_status_hourly.reset_index(inplace=True)
In [11]:
df_status_hourly.head()
Out[11]:
In [12]:
df_status_hourly.shape
Out[12]:
In [13]:
df_status_hourly.dropna(inplace=True)
In [14]:
df_status_hourly.shape
Out[14]:
In [15]:
df_status_hourly.to_csv('DATA/babs_master/status_master_60m.csv', index=False)
In [ ]: