In [1]:
import seaborn as sns
import metapack as mp
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import display
import datetime
from cityiq import Config, CityIq
from cityiq.api import Asset, Location
from cityiq.scrape import EventScraper
%matplotlib inline
sns.set_context('notebook')
mp.jupyter.init()
In [2]:
#pkg = mp.jupyter.open_package()
pkg = mp.jupyter.open_source_package()
pkg
Out[2]:
In [15]:
tracts = pkg.reference('tract_boundaries').geoframe()
comm = pkg.reference('sd_community_boundaries').geoframe()
Out[15]:
In [4]:
tz = datetime.datetime.now(datetime.timezone.utc).astimezone().tzinfo
config = Config()
ciq = CityIq(config, start_time='20180901')
In [5]:
roads = pkg.reference('sdroads').geoframe()
sdroads = roads[(roads.lpsjur == 'SD') | (roads.rpsjur == 'SD')].copy()
# Convert to EPSG:2875 ( California zone 6, feet ) so we can make the buffer in feet
sdroads = sdroads.to_crs({'init':'epsg:2875'})
# Buffer the roads to be big enough to enclose the assets
sdroads['geometry'] = sdroads.geometry.apply(lambda g : g.buffer(60,cap_style=1, join_style=2))
sdroads = sdroads.to_crs( {'init':'epsg:4326'} )
# sdroads.to_csv('sdroads.csv')
In [28]:
assets = gpd.GeoDataFrame(ciq.asset_dataframe, geometry='geometry')
l1 = len(assets)
print(len(assets))
assets.crs = {'init':'epsg:4326'}
ac = assets.copy()
ac['geometry'] = ac.centroid.to_crs({'init':'epsg:2875'}).buffer(20).to_crs( {'init':'epsg:4326'} )
# Link to roads
t = gpd.sjoin(ac, sdroads, how='left').drop(columns=['index_right'])
# Link to communities
t = gpd.sjoin(t, comm[comm['type'] == 'sd_community'][['geometry','name']], how='left').drop(columns=['index_right'])
# Link to tracts
t = gpd.sjoin(t, tracts[['geometry','geoid']], how='left')
t = t[list(assets)[:-1] +
['name','geoid','roadsegid', 'speed', 'oneway','abloaddr', 'abhiaddr', 'rd30full'] +
['geometry'] ].drop_duplicates(subset='assetUid')\
.rename(columns={'name':'community_name','geoid':'tract_geoid'})
assets = t.drop(columns='geometry').join(assets['geometry'])
assert len(assets) == l1
# How many assets and locations did not get linked to roads?
len(assets[assets.speed.isnull()]), len(assets)
Out[28]:
In [36]:
locations = gpd.GeoDataFrame(ciq.locations_dataframe, geometry='geometry')
l1 = len(locations)
locations.crs = {'init':'epsg:4326'}
lc = locations.copy()
lc['geometry'] = locations.centroid.to_crs({'init':'epsg:2875'}).buffer(20).to_crs( {'init':'epsg:4326'} )
# Link to roads
t = gpd.sjoin(lc, sdroads, how='left').drop(columns=['index_right'])
# Link to communities
t = gpd.sjoin(t, comm[comm['type'] == 'sd_community'][['geometry','name']], how='left').drop(columns=['index_right'])
# Link to tracts
t = gpd.sjoin(t, tracts[['geometry','geoid']], how='left')
t = t[list(lc)[:-1] +
['name','geoid','roadsegid', 'speed', 'oneway','abloaddr', 'abhiaddr', 'rd30full'] +
['geometry'] ].drop_duplicates(subset='locationUid')\
.rename(columns={'name':'community_name','geoid':'tract_geoid'})
locations = t.drop(columns='geometry').join(locations['geometry'])
assert len(locations) == l1
In [38]:
locations.head()
Out[38]:
In [ ]: