In [2]:
import seaborn as sns
import metapack as mp
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import display
import geopandas as gpd
from shapely.geometry import Point
import re
import sys
!{sys.executable} -m pip --quiet install python-slugify
%matplotlib inline
sns.set_context('notebook')
mp.jupyter.init()
In [3]:
pkg = mp.jupyter.open_package()
#pkg = mp.jupyter.open_source_package()
pkg
Out[3]:
In [4]:
%%time
df15 = pkg.reference('parking_transactions_2015').dataframe()
df16 = pkg.reference('parking_transactions_2016').dataframe()
df17 = pkg.reference('parking_transactions_2017').dataframe()
df18 = pkg.reference('parking_transactions_2018').dataframe()
In [ ]:
%%time
from functools import reduce
all_dfs = [df15, df16, df17, df18]
# Just run this once to get the second columns list.
#columns = reduce(lambda a, b: a | set(b.columns), all_dfs, set())
columns = [
'uuid',
'pole_id',
'trans_start',
'meter_expire',
'meter_type',
'pay_method',
'trans_amt',
'smartcard_id'
]
for df in all_dfs:
for col in columns:
if col not in list(df.columns):
df[col] = None
aligned_dfs = [ df[columns] for df in all_dfs ]
df = pd.concat(aligned_dfs)
df['trans_start'] = pd.to_datetime(df['trans_start'])
df['meter_expire'] = pd.to_datetime(df['meter_expire'])
df['n'] = 1
df['pole_id'] = df.pole_id.astype('category')
df['meter_type'] = df.meter_type.astype('category')
df['pay_method'] = df.pay_method.astype('category')
df['trans_amt'] = df.trans_amt.astype('int32')
df['duration'] = ((df.meter_expire - df.trans_start).dt.seconds/60).round(0).astype('int32')
#df.drop(columns=['uuid', 'smartcard_id', 'duration'], inplace=True)
t = df.merge(locations[['pole_id','pole_index']])
transactions = t[['pole_index', 'trans_start', 'duration', 'trans_amt', 'meter_type', 'pay_method' ]]
transactions.head()
In [191]:
In [209]:
In [211]:
transactions.memory_usage()
Out[211]:
In [212]:
transactions.dtypes
Out[212]:
In [218]:
t = transactions.set_index('trans_start').groupby(['pole_index', pd.Grouper(freq='15Min')]).count().reset_index()
In [219]:
t.head()
Out[219]:
In [223]:
In [ ]:
In [307]:
from matplotlib.pyplot import xticks, xlabel, suptitle
t1 = t.groupby([t.trans_start.dt.hour, t.trans_start.dt.dayofweek]).sum()
fig, ax = plt.subplots(figsize=(8, 4))
ax = sns.heatmap(t1[['duration']].unstack(), ax=ax, cmap="BrBG");
locs, labels = xticks()
xticks(locs, [ f'{e}' for e in range(24)]);
xlabel("Hour");
suptitle("Parking Flow By Hour of Day and Month of Data")
Out[307]:
In [228]:
t2 = t.groupby([t.trans_start.dt.month, t.trans_start.dt.hour]).sum()
fig, ax = plt.subplots(figsize=(8, 4))
ax = sns.heatmap(t2[['duration']].unstack(), ax=ax, cmap="BrBG");
In [314]:
#t = df[ (df.trans_start.dt.hour > 7) ]
t = df[ (df.pole_id == 'UV-1400S') ]
t3 = t.groupby([t.trans_start.dt.hour, t.trans_start.dt.dayofweek]).count()
fig, ax = plt.subplots(figsize=(8, 4))
ax = sns.heatmap(t3[['n']].unstack(), ax=ax, cmap="viridis");
In [328]:
t = df[ (df.pole_id == 'UV-1400S') ].set_index('trans_start').sort_index()
t.head()
Out[328]:
In [329]:
df.meter_type.value_counts()
Out[329]:
In [ ]: