In [1]:
import os
from glob import glob
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
In [2]:
input_folder = '../data/zhbikes'
In [3]:
input_files = glob('{}/*.csv'.format(input_folder))
In [4]:
dataframes = [pd.read_csv(file)[['fk_zaehler','datum','velo_in','velo_out','fuss_in','fuss_out','objectid']] for file in input_files]
In [5]:
df = pd.concat(dataframes)
In [6]:
df.shape
Out[6]:
In [7]:
df.head()
Out[7]:
In [8]:
df.dtypes
Out[8]:
In [9]:
df.drop(['fuss_in','fuss_out'], axis=1, inplace=True)
In [10]:
df.dropna(inplace=True)
In [11]:
df.columns = ['counting_station','datetime','velo_in', 'velo_out', 'objectid']
In [12]:
df['datetime'] = df['datetime'].apply(pd.Timestamp)
In [13]:
df.head()
Out[13]:
In [14]:
stations = df['counting_station'].unique()
In [15]:
stations.shape
Out[15]:
In [16]:
def sum_by_week(station):
df_station = df[df['counting_station'] == station]
aggregated = df_station.set_index('datetime').resample('1W').agg({'velo_in' : 'sum', 'velo_out': 'sum'}).reset_index()
aggregated['velo_all'] = aggregated['velo_out'] + aggregated['velo_in']
aggregated['velo_out'] = -aggregated['velo_out']
aggregated['counting_station'] = station
return aggregated
In [17]:
dataframes_weekly = [sum_by_week(station) for station in stations]
df_weekly = pd.concat(dataframes_weekly).reset_index(drop=True)
In [18]:
sns.set(font_scale=2)
grid = sns.FacetGrid(df_weekly, col="counting_station", hue="counting_station", palette="tab20c",
col_wrap=4, height=3, aspect=3)
grid.map(plt.plot, "datetime", "velo_in")
grid.map(plt.plot, "datetime", "velo_out")
grid.map(plt.axhline, y=0, ls=":", c=".5")
grid.set_xticklabels(rotation=30)
Out[18]:
In [19]:
sns.set(rc={'figure.figsize':(15, 10)})
sns.lineplot(x="datetime", y="velo_all", hue="counting_station", data=df_weekly)
Out[19]:
In [ ]: