In [10]:
import pandas as pd
import os
import seaborn as sns
%matplotlib inline
In [69]:
bridge = pd.read_csv('pedestrian_count_bridge.csv')
bridge = bridge.set_index(pd.DatetimeIndex(bridge.pop('datetime')))
station = pd.read_csv('pedestrian_count_station.csv')
station = station.set_index(pd.DatetimeIndex(station.pop('datetime')))
In [70]:
bridge = pd.read_csv('pedestrian_count_bridge.csv', index_col=0, parse_dates=[0])
station = pd.read_csv('pedestrian_count_station.csv', index_col=0, parse_dates=[0])
In [71]:
station.head()
Out[71]:
In [21]:
bridge.head()
Out[21]:
In [27]:
all_dat = bridge.join(station, how='inner')
In [72]:
all_dat.head()
Out[72]:
In [73]:
bridge.plot()
Out[73]:
In [74]:
bridge.resample('D', how='sum').plot()
Out[74]:
In [75]:
bridge.resample('M', how='sum').plot(subplots=True, figsize=(12, 10))
Out[75]:
In [76]:
all_dat['2014':].sum().sort(inplace=False)
Out[76]:
In [77]:
all_dat['2014-06'].sum().sort(inplace=False)
# OR all_dat[all_dat.index.month == 6]
Out[77]:
In [78]:
all_dat[(all_dat.index.weekday < 5)
& (all_dat.index.hour == 8)].sum().sort(inplace=False)
Out[78]:
In [79]:
dt = station.ix['2014-07']['Flagstaff Station']
dt.groupby(dt.index.hour).sum().plot(kind='bar')
Out[79]:
In [93]:
changes = (all_dat.resample('A', how='sum').diff()['2013'].values
/ all_dat.resample('A', how='sum')['2012'].values) * 100
pd.DataFrame(changes, columns=all_dat.columns)
Out[93]:
In [100]:
all_dat.sum(axis=1).groupby(all_dat.index.dayofweek).sum().sort(inplace=False, ascending=False)
Out[100]:
In [129]:
day_dat = all_dat.groupby(all_dat.index.date).sum()
In [133]:
for c in day_dat.columns:
dt = day_dat[c].idxmax()
mx = day_dat[c].max()
print '{}\t{:>35}\t{:9.2f}'.format(dt, c, mx)
In [158]:
import pylab as plt
import numpy as np
fig, ax = plt.subplots(1, figsize=(12, 10))
_ = day_dat.boxplot(grid=False, ax=ax)
ax.set_ylim(0, np.percentile(day_dat.values.flatten(), 82.5))
ax.set_xticklabels(day_dat.columns, rotation=90)
Out[158]:
In [146]:
np.percentile(day_dat.values.flatten(), 90)
Out[146]:
In [ ]: