In [1]:
import seaborn as sns
import metapack as mp
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import display 
import geopandas as gpd
from shapely.geometry import Point
import re

import sys
!{sys.executable} -m pip --quiet install python-slugify

%matplotlib inline
sns.set_context('notebook')
mp.jupyter.init()

In [2]:
#pkg = mp.jupyter.open_package()
pkg = mp.jupyter.open_source_package()
pkg


Out[2]:

San Diego Smart Parking Meters

sandiego.gov-parking_meters-1 Last Update: 2019-02-27T22:13:12

Transactions from downtown parking meters

Documentation Links

Contacts

References


In [3]:
df15 = pkg.reference('parking_transactions_2015').dataframe().sample(100_000)
df16 = pkg.reference('parking_transactions_2016').dataframe().sample(100_000)
df17 = pkg.reference('parking_transactions_2017').dataframe().sample(100_000)
df18 = pkg.reference('parking_transactions_2018').dataframe().sample(100_000)

In [4]:
from functools import reduce
all_dfs = [df15, df16, df17, df18]
#columns = reduce(lambda a, b: a | set(b.columns), all_dfs, set())

columns = [
 'uuid',
 'pole_id',
 'trans_start',
 'meter_expire',
 'meter_type',
 'pay_method',
 'trans_amt',
 'smartcard_id'
]

In [5]:
for df in all_dfs:
    for col in columns:
        if col not in list(df.columns):
            df[col] = None
            
aligned_dfs = [ df[columns] for df in all_dfs ]
df = pd.concat(aligned_dfs)

In [6]:
df['trans_start'] = pd.to_datetime(df['trans_start'])
df['meter_expire'] = pd.to_datetime(df['meter_expire'])
df['n'] = 1
df.head()


Out[6]:
uuid pole_id trans_start meter_expire meter_type pay_method trans_amt smartcard_id n
2447788 None 5-3217 2015-04-24 10:46:32 2015-04-24 12:22:32 SS CASH 200 NaN 1
2782263 None 7-819 2015-05-06 17:02:32 2015-05-06 17:57:19 SS CASH 35 NaN 1
1289566 None WD-411 2015-03-12 16:39:09 2015-03-12 18:00:01 SS CASH 25 NaN 1
1816557 None 7-3810 2015-04-01 15:12:38 2015-04-01 15:58:02 SS CASH 25 NaN 1
2389446 None N-3717 2015-04-22 12:37:29 2015-04-22 13:37:29 SS CREDIT CARD 125 NaN 1

In [7]:
len(df)


Out[7]:
400000

In [8]:
#
# Link in communities and tracts
loc = pkg.reference('locations_source').dataframe()

t = loc[ (loc.longitude != 0) & (loc.longitude != -180 ) & (loc.longitude != -117)]
gloc = gpd.GeoDataFrame(t, geometry=
                        [Point(x,y) for x,y in zip(t.longitude, t.latitude)])

compkg = mp.open_package('http://library.metatab.org/sandiegodata.org-geography-2018.csv')
tract_boundaries = compkg.resource('tract_boundaries').geoframe()
tracts = compkg.resource('tract_communities').dataframe()
gloc.crs = tract_boundaries.crs

t = gpd.sjoin(gloc, tract_boundaries, how='left').drop(columns='index_right')
t = t.merge(tracts[['geoid', 'community_name']], on='geoid')
cols = ['pole', 'community_name', 'geoid', 'geometry', ]



# Re-merge to get the poles that had invalid locations

locations = loc.merge(t[cols], on='pole')



max_pat = re.compile(r'(\d+) (Min|Hour) Max')
def find_max(v):
    
    m = max_pat.search(v)
    
    if not m:
        return None
    
    val = int(m.group(1))
        
    if m.group(2) == 'Hour':
        val *= 60 
    
    return val

locations['max_time'] = locations.config_name.apply(find_max)  
    
cost_pat = re.compile(r'\$(\d+\.\d\d)')
def extract_cost(v):
    
    m = cost_pat.search(v)
    
    if not m:
        return None
    
    return float(m.group(1))

locations['per_minute_cost'] = (locations.config_name.apply(extract_cost)/60).round(4)


hours_pat = re.compile(r'(\d+)am-(\d)pm')
import json 

def extract_hours(v):
    
    m = hours_pat.search(v)
    
    if not m:
        return None
    
    hours =  [int(m.group(1)), int(m.group(2))+12]
    
    v = v.replace(m.group(0), '')
    
    m = hours_pat.search(v)
    
    if not m:
        return json.dumps(hours) # JSON notreally necessary; python is same

    return json.dumps(hours + [int(m.group(1)), int(m.group(2))+12])
    
locations['hours'] = locations.config_name.apply(extract_hours)

days_pat = re.compile(r'(M|Mon)-(Fri|Sat)')

def extract_days(v):
    
    if 'Daily' in v:
        return json.dumps(['Mon','Sun'])
    
    m = days_pat.search(v)
    
    if not m:
        return ''
    
    s,e = m.group(1), m.group(2)
    
    if s == 'M':
        s = 'Mon'
    
    days =  [s,e]
    
    v = v.replace(m.group(0), '')
    
    m = days_pat.search(v) 
    
    if not m:
        
        if 'Sat' in v:
            return json.dumps(days+ ['Sat','Sat'])
        else:
            return json.dumps(days) # JSON not really necessary; python is same

    return json.dumps(days + [m.group(1), m.group(2)])
    
locations['days'] = locations.config_name.apply(extract_days)


locations['area'] = locations.area.astype('category')
locations.sort_values('pole', inplace=True)
locations['pole_index'] = locations.reset_index().index + 1
locations.rename(columns={'pole':'pole_id'}, inplace=True)

# Map poles to meter types. 
meter_type_merge = df[['pole_id', 'meter_type']].sort_values('pole_id').drop_duplicates()


locations = locations.merge(meter_type_merge, on='pole_id', how='left')

final_cols = ['pole_id', 'pole_index', 'config_id', 'config_name', 'max_time', 'hours', 'days', 'per_minute_cost', 
              'meter_type', 'zone', 'area', 'sub_area',  'community_name', 'geoid','longitude', 'latitude', 'geometry']

locations = locations[final_cols]

locations.head()


Out[8]:
pole_id pole_index config_id config_name max_time hours days per_minute_cost meter_type zone area sub_area community_name geoid longitude latitude geometry
0 1-1004 1 9000 2 Hour Max $1.25 HR 8am-6pm Mon-Sat 120.0 [8, 18] ["Mon", "Sat"] 0.0208 SS Downtown Core - Columbia 1000 FIRST AVE Downtown 14000US00000006939 -117.163929 32.715904 POINT (-117.163929 32.71590399999999)
1 1-1006 2 9000 2 Hour Max $1.25 HR 8am-6pm Mon-Sat 120.0 [8, 18] ["Mon", "Sat"] 0.0208 SS Downtown Core - Columbia 1000 FIRST AVE Downtown 14000US00000006939 -117.163930 32.716037 POINT (-117.16393 32.716037)
2 1-1008 3 9000 2 Hour Max $1.25 HR 8am-6pm Mon-Sat 120.0 [8, 18] ["Mon", "Sat"] 0.0208 SS Downtown Core - Columbia 1000 FIRST AVE Downtown 14000US00000006939 -117.163931 32.716169 POINT (-117.163931 32.716169)
3 1-1020 4 9115 15 Min Max $1.25 HR 8am-6pm Mon-Sat 15.0 [8, 18] ["Mon", "Sat"] 0.0208 SS Downtown Core - Columbia 1000 FIRST AVE Downtown 14000US00000006939 -117.161278 32.717890 POINT (-117.161278 32.71789)
4 1-1310 5 12466 2 Hour Max $1.25 HR 8am-4pm Mon-Fri 8am-6pm Sat 120.0 [8, 16, 8, 18] ["Mon", "Fri", "Sat", "Sat"] 0.0208 SS Downtown Core - Columbia 1300 FIRST AVE Downtown 14000US00000006939 -117.163951 32.719024 POINT (-117.163951 32.719024)

In [ ]:


In [ ]: