In [2]:
import seaborn as sns
import metapack as mp
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import display 
import geopandas as gpd
from shapely.geometry import Point
import re

import sys
!{sys.executable} -m pip --quiet install python-slugify

%matplotlib inline
sns.set_context('notebook')
mp.jupyter.init()

In [3]:
pkg = mp.jupyter.open_package()
#pkg = mp.jupyter.open_source_package()
pkg


Out[3]:

San Diego Smart Parking Meters

sandiego.gov-parking_meters-1 Last Update: 2019-03-01T04:13:49

Transactions from downtown parking meters

Documentation Links

Contacts

Resources

References


In [4]:
%%time
df15 = pkg.reference('parking_transactions_2015').dataframe()
df16 = pkg.reference('parking_transactions_2016').dataframe()
df17 = pkg.reference('parking_transactions_2017').dataframe()
df18 = pkg.reference('parking_transactions_2018').dataframe()


CPU times: user 1min 54s, sys: 39.1 s, total: 2min 34s
Wall time: 5min 18s

In [ ]:
%%time
from functools import reduce
all_dfs = [df15, df16, df17, df18]

# Just run this once to get the second columns list. 
#columns = reduce(lambda a, b: a | set(b.columns), all_dfs, set())

columns = [
 'uuid',
 'pole_id',
 'trans_start',
 'meter_expire',
 'meter_type',
 'pay_method',
 'trans_amt',
 'smartcard_id'
]

for df in all_dfs:
    for col in columns:
        if col not in list(df.columns):
            df[col] = None
            
aligned_dfs = [ df[columns] for df in all_dfs ]
df = pd.concat(aligned_dfs)

df['trans_start'] = pd.to_datetime(df['trans_start'])
df['meter_expire'] = pd.to_datetime(df['meter_expire'])
df['n'] = 1

df['pole_id'] = df.pole_id.astype('category')
df['meter_type'] = df.meter_type.astype('category')
df['pay_method'] = df.pay_method.astype('category')
df['trans_amt'] = df.trans_amt.astype('int32')
df['duration'] = ((df.meter_expire - df.trans_start).dt.seconds/60).round(0).astype('int32')
#df.drop(columns=['uuid', 'smartcard_id', 'duration'], inplace=True)
t = df.merge(locations[['pole_id','pole_index']])

transactions = t[['pole_index', 'trans_start', 'duration', 'trans_amt', 'meter_type', 'pay_method'   ]]

transactions.head()

In [191]:


In [209]:


In [211]:
transactions.memory_usage()


Out[211]:
Index          305115664
pole_index     305115664
trans_start    305115664
duration       152557832
trans_amt      152557832
meter_type      38139554
pay_method      38139658
dtype: int64

In [212]:
transactions.dtypes


Out[212]:
pole_index              int64
trans_start    datetime64[ns]
duration                int32
trans_amt               int32
meter_type           category
pay_method           category
dtype: object

In [218]:
t = transactions.set_index('trans_start').groupby(['pole_index', pd.Grouper(freq='15Min')]).count().reset_index()

In [219]:
t.head()


Out[219]:
pole_index trans_start duration trans_amt meter_type pay_method
0 1 2015-01-01 12:15:00 1 1 1 1
1 1 2015-01-01 12:45:00 1 1 1 1
2 1 2015-01-01 15:15:00 1 1 1 1
3 1 2015-01-01 15:45:00 1 1 1 1
4 1 2015-01-01 17:15:00 1 1 1 1

In [223]:


In [ ]:


In [307]:
from matplotlib.pyplot import xticks, xlabel, suptitle
t1 = t.groupby([t.trans_start.dt.hour, t.trans_start.dt.dayofweek]).sum()
fig, ax = plt.subplots(figsize=(8, 4)) 
ax = sns.heatmap(t1[['duration']].unstack(), ax=ax, cmap="BrBG");
locs, labels = xticks()
xticks(locs, [ f'{e}' for e in range(24)]);
xlabel("Hour");
suptitle("Parking Flow By Hour of Day and Month of Data")


Out[307]:
Text(0.5, 0.98, 'Parking Flow By Hour of Day and Month of Data')

In [228]:
t2 = t.groupby([t.trans_start.dt.month, t.trans_start.dt.hour]).sum()

fig, ax = plt.subplots(figsize=(8, 4)) 
ax = sns.heatmap(t2[['duration']].unstack(), ax=ax, cmap="BrBG");



In [314]:
#t = df[ (df.trans_start.dt.hour > 7) ]
t = df[ (df.pole_id == 'UV-1400S')  ]
t3 = t.groupby([t.trans_start.dt.hour, t.trans_start.dt.dayofweek]).count()

fig, ax = plt.subplots(figsize=(8, 4)) 
ax = sns.heatmap(t3[['n']].unstack(), ax=ax, cmap="viridis");



In [328]:
t = df[ (df.pole_id == 'UV-1400S')  ].set_index('trans_start').sort_index()
t.head()


Out[328]:
uuid meter_type pole_id trans_amt pay_method meter_expire n
trans_start
2018-01-02 10:02:38 MSUV1400S18010210023835 MS UV-1400S 35 CASH 2018-01-02 10:19:26 1
2018-01-02 10:03:27 MSUV1400S180102100327250 MS UV-1400S 250 CREDIT CARD 2018-01-02 12:03:27 1
2018-01-02 10:16:42 MSUV1400S180102101642100 MS UV-1400S 100 CASH 2018-01-02 11:04:42 1
2018-01-02 10:18:09 MSUV1400S18010210180975 MS UV-1400S 75 CASH 2018-01-02 10:54:09 1
2018-01-02 10:57:47 MSUV1400S18010210574750 MS UV-1400S 50 CREDIT CARD 2018-01-02 11:21:47 1

In [329]:
df.meter_type.value_counts()


Out[329]:
SS    9484110
MS      40279
Name: meter_type, dtype: int64

In [ ]: