In [1]:
import seaborn as sns
import metapack as mp
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import display 

%matplotlib inline
sns.set_context('notebook')
mp.jupyter.init()

In [2]:
pkg = mp.jupyter.open_package()
#pkg = mp.jupyter.open_source_package()
pkg


Out[2]:

San Diego Parking Time Series

sandiego.gov-cityiq_parking-2 Last Update: 2019-02-20T04:10:31

15 minute interval parking utilization for 1600 parking zones in San Diego city.

This datasets is compiled from parking events scraped from the San Diego CityIQ smart streetmap system, via the cityiq Python package. The dataset is compiled from PKIN and PKOUT events between the dates of Sept 2018 and Feb 2019 for the whole SaN Diego system.

The dataset is heavily processed to eliminate duplicate events because there are many spurious events, but an excess of PKIN events. When computing the number of cars parked in all parking zones, the excess of PKIN events results in about 60,000 extra cars per month. These issues are explored in an Jupyter Notebook

The records in this dataset referece parking zones. More information, including geographic positions, are avialble in the CityIQ Objects dataset.

Processing

These data were produced with these programs:

$ pip install cityiq
$ ciq_config -w
$ # Edit .cityiq-config.yaml with client-id and secret
# Scrape PKIN and PKOUT from Sept 2018 to present
$ $ ciq_events -s -e PKIN -e PKOUT -t 20190901
# Split event dump in to event-location csv files
$ ciq_events -S
# Deduplicate and normalize
$ ciq_events -n

The last step, deduplication and normalization, involves these steps:

  • Group events by event type, location and 1 second period and select only 1 record from each group
  • Collect runs of a events of one type and select only the first record of the run, up to a run of 4 minutes long
  • For each location, compute the cumulative sum of in and outs ( calculating the number of cars in the zone ) then create a rolling 2-day average. Subtract off the average.

The third step is demonstrated in this image:

The blue line is the original utilization for a single location, showing the larger number of PKIN events than PKOUT events. The red line is the 2-day rolling average, and the green line is after subtracting the 2-dat rolling average.

In the final dataset, the data for the blue line is in the cs column, which is created from the cumulative sum of the delta column. The green line is the data in the cs_norm column, which is differentiated to create the delta_normcolumn.

For most purpuses you should use cs_norm and delta_norm.

Contacts

Resources

References

  • parking_events. Parking events
  • assets. Data package with metadata about the parking zone locations.
  • locations. Data package with metadata about the parking zone locations.

In [3]:
assets = pkg.reference('assets').dataframe()
locations = pkg.reference('locations').dataframe()
prk = pkg.reference('parking_events').dataframe()

prk.columns  = [e.lower() for e in prk.columns]

In [4]:
prk_loc = prk.merge(locations, on='locationuid')
df = prk_loc[prk_loc.community_name == 'Downtown'].copy()

In [62]:
prk_loc.community_name.value_counts().head(25)


Out[62]:
Downtown                              9477418
Uptown                                1438554
Greater North Park                    1186693
Mid-City:Eastern Area                  760614
Skyline-Paradise Hills                 454647
Greater Golden Hill                    269607
College Area                           266956
Ocean Beach                            233634
Mid-City:Normal Heights                212063
Pacific Beach                          200115
Mira Mesa                              183092
Mid-City:Kensington-Talmadge           182035
Encanto Neighborhoods,Southeastern     149660
Mid-City:City Heights                  139941
Navajo                                 123328
Carmel Valley                          116767
La Jolla                               103292
University                              92917
Balboa Park                             88510
Clairemont Mesa                         87802
Linda Vista                             79106
Pacific Highlands Ranch                 79034
Peninsula                               77401
Serra Mesa                              63536
Rancho Penasquitos                      62999
Name: community_name, dtype: int64

In [13]:
dfu = prk_loc[prk_loc.community_name == 'Uptown'].copy()
dfnp = prk_loc[prk_loc.community_name == 'Greater North Park'].copy()
dfgh = prk_loc[prk_loc.community_name == 'Greater Golden Hill'].copy()

In [21]:
dfsky = prk_loc[prk_loc.community_name == 'Skyline-Paradise Hills'].copy()
dfmid = prk_loc[prk_loc.community_name == 'Mid-City:Eastern Area'].copy()
dfpb = prk_loc[prk_loc.community_name == 'Pacific Beach'].copy()

In [63]:
dflj = prk_loc[prk_loc.community_name == 'La Jolla'].copy()
dfunv = prk_loc[prk_loc.community_name == 'University'].copy()
dfprk = prk_loc[prk_loc.community_name == 'Balboa Park'].copy()

In [5]:
df['month'] = df.time.apply( lambda v: v.date().replace(day=15))
df['flow'] = df.delta_norm.apply( lambda v : 1 if v > 0 else -1  )

In [6]:
from matplotlib.pyplot import xticks, xlabel, suptitle
t = df.groupby([df.month, df.time.dt.hour]).sum()
fig, ax = plt.subplots(figsize=(8, 4)) 
ax = sns.heatmap(t[['delta_norm']].unstack(), ax=ax, cmap="BrBG", center=0);
locs, labels = xticks()
xticks(locs, [ f'{e}' for e in range(24)]);
xlabel("Hour");
suptitle("Parking Flow By Hour of Day and Month of Data")


Out[6]:
Text(0.5, 0.98, 'Parking Flow By Hour of Day and Month of Data')

In [7]:
from matplotlib.pyplot import xticks, xlabel, suptitle
t = df.groupby([df.month, df.time.dt.time]).sum()

ax = sns.heatmap(t[['delta_norm']].unstack(), ax=ax, cmap="BrBG");
locs, labels = xticks()
#xticks(locs, [ f'{e}' for e in range(24)]);
xlabel("Time");
suptitle("Parking Flow By TIme of Day and Month of Data")


Out[7]:
Text(0.5, 0.98, 'Parking Flow By TIme of Day and Month of Data')

In [8]:
from matplotlib.pyplot import xticks, xlabel, suptitle
t = df.groupby([df.month, df.time.dt.time]).sum()
t['flow'] = t.delta_norm.apply( lambda v : 1 if v > 0 else -1  )
fig, ax = plt.subplots(figsize=(12, 4)) 
ax = sns.heatmap(t[['delta_norm']].unstack(), ax=ax, cmap='BrBG', center=0);
locs, labels = xticks()
#xticks(locs, [ f'{e}' for e in range(24)]);
xlabel("Time");
suptitle("Parking Flow By Time of Day and Month of Data\nDowntown Community")


Out[8]:
Text(0.5, 0.98, 'Parking Flow By Time of Day and Month of Data\nDowntown Community')

In [49]:
def month_time_plot(df, ax = None, community=''):
    from matplotlib.pyplot import xticks, xlabel, suptitle
    x = df
    x['month'] = x.time.apply( lambda v: v.date().replace(day=15))
    t = x.groupby([x.month, x.time.dt.time]).sum()
    t['flow'] = t.delta_norm.apply( lambda v : 1 if v > 0 else -1  )
    if ax is None:
        fig, ax = plt.subplots(figsize=(12, 4)) 
    ax = sns.heatmap(t[['delta_norm']].unstack(), ax=ax, cmap='BrBG', center=0);
    locs, labels = xticks()
    xticks(locs, [ f"{h:2d}:{m:02d}" for h in range(24)  for m in (0,30)]);
    ax.set_xlabel("Time");
    ax.set_title("Parking Flow By Time of Day and Month of Data\n{}".format(community));

In [ ]:
fig, axes = plt.subplots(6,1, figsize=(12,24), sharex=True, sharey=True)
axes = axes.ravel()

month_time_plot(dfpb, axes[0],'Pacific Beach')
month_time_plot(dfnp, axes[1],'North Park')
month_time_plot(dfgh, axes[2],'Golden Hill')
month_time_plot(dfsky, axes[3],'Skyline')
month_time_plot(dfmid, axes[4],'Midcity')
month_time_plot(dflj, axes[0],'La Jolla')
month_time_plot(dfunv, axes[1],'University')
month_time_plot(dfprk, axes[2],'Balboa Park')
month_time_plot(df, axes[5],'Downtown')

plt.tight_layout()

In [9]:
from matplotlib.pyplot import xticks, yticks, xlabel, ylabel, suptitle
t = df.groupby([df.time.dt.dayofweek, df.time.dt.time]).sum()
t['flow'] = t.delta_norm.apply( lambda v : 1 if v > 0 else -1  )
fig, ax = plt.subplots(figsize=(12, 4)) 
ax = sns.heatmap(t[['delta_norm']].unstack(), ax=ax, cmap='BrBG', center=0);
locs, labels = yticks()
yticks(locs, list("MTWRFSS"));
xlabel("Time");
ylabel("Day of Week")
suptitle("Parking Flow By Day of Week and Month of Data\nDowntown Community");



In [59]:
# Residential section of Soledad Mountain Road
month_time_plot(prk_loc[prk_loc.locationuid.isin(['vdorekhgqzjgzv6tpw','g8lswmtn3pujgzv17ul'])].copy(),
                community='Downtown')



In [60]:
#  Regents road near La Jolla COlony
month_time_plot(prk_loc[prk_loc.locationuid.isin(['n1i27hhjxgjhwg95ru','7lio4wkir2ejhwg7bs2'])].copy(),
                community='Downtown')



In [ ]: