In [1]:
import os
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import geopandas as gpd
import networkx as nx
import larch
from more_itertools import pairwise
larch.__version__
Out[1]:
Welcome to Exampville, the best simulated town in this here part of the internet!
Exampville is provided with Larch, and uses the kind of data that a transportation planner might have available when building a travel model. However, this data is entirely fictional.
This page walks through the creation of this synthetic data.
In [2]:
seed=0
np.random.seed(seed)
In [3]:
import larch.exampville
We start with a shape file delineating some travel analysis zones. (Five bonus points if you can identify the real American city from which these tract shapes were derived.)
In [4]:
taz_shape = gpd.read_file("zip://"+larch.exampville.files.shapefile)
ax = taz_shape.plot(edgecolor='w', figsize=(10,10), cmap='tab20')
ax.set_xticks([])
ax.set_yticks([])
ax.set_frame_on(False)
# Put Zone labels in the meatiest part of the zone shape...
for idx, row in taz_shape.iterrows():
shrink = row['geometry']
while True:
try:
shrink1 = shrink.buffer(-5)
if shrink1.area <= 0:
break
else:
shrink = shrink1
except:
break
ax.annotate(
s=row['TAZ'],
xy=tuple(shrink.representative_point().coords)[0],
horizontalalignment='center',
verticalalignment='center',
)
In [5]:
higher_income_tazs = [2,33,34,5]
lower_income_tazs = [40,29,30,17,16]
In [6]:
nZones = len(taz_shape)
In [7]:
zones_cbd = [36]
zones_urb = [4,28,25,3,35,6]
In [8]:
n_hh = 5000
In [9]:
### Group 1: In-town density
n_hh_1 = 3500
mean = [650, 400, 11, 0.35, 0.52]
i_y = 80
i_x = 100
v_y = -22
v_x = 10
s_x = 10
s_y = -22
s_v = 0.21
s_i = 0.1
cov = [
[44000, 25000, i_x, v_x, s_x],
[25000, 44000, i_y, v_y, s_y],
[ i_x, i_y, 1.0, 0.4, s_i],
[ v_x, v_y, 0.4, 0.35, s_v],
[ s_x, s_y, s_i, s_v, 0.35],
]
x, y, income, veh, hhsz = np.random.multivariate_normal(mean, cov, n_hh_1).T
# Assemble into a DataFrame
hh_locations = pd.DataFrame.from_dict({
'X':np.round(x,2),
'Y':np.round(y,2),
'INCOME':np.exp(income).astype(int),
'N_VEHICLES':np.exp(veh).astype(int),
'HHSIZE':np.exp(hhsz).astype(int)+1,
})
# Convert to a GeoDataFrame
hh_locations = gpd.GeoDataFrame(
hh_locations,
geometry=gpd.points_from_xy(hh_locations.X, hh_locations.Y),
crs={},
)
# Attach HOMETAZ, and drop points outside the region.
hh_locations = gpd.sjoin(hh_locations, taz_shape[['TAZ','geometry']], how='inner', op='within')
In [10]:
hh_locations.HHSIZE.statistics(discrete=True)
Out[10]:
In [11]:
pd.pivot_table(
hh_locations,
index='HHSIZE',
columns='N_VEHICLES',
aggfunc={'X':'size'},
).fillna(0).astype(int)
Out[11]:
In [12]:
ax = taz_shape.plot(edgecolor='k', figsize=(10,10), color='w')
ax.set_xticks([])
ax.set_yticks([])
ax.set_frame_on(False)
hh_locations.plot(ax=ax, column='INCOME', alpha=1, cmap='Reds', vmax=150000);
In [13]:
ax = taz_shape.plot(edgecolor='k', figsize=(10,10), color='w')
ax.set_xticks([])
ax.set_yticks([])
ax.set_frame_on(False)
hh_locations.plot(ax=ax, column='N_VEHICLES', alpha=1, cmap='Reds', vmax=3);
In [14]:
hh_locations.INCOME.statistics()
Out[14]:
In [15]:
hh_locations.head()
Out[15]:
In [16]:
#### Group 2: Regional base population
n_hh_2 = n_hh - len(hh_locations) # Enough to get back to the desired total
mean = [650, 400, 20000, 0.45, 0.52]
i_y = 0
i_x = 0
v_y = 0
v_x = 0
s_x = 0
s_y = 0
v_i = 846
s_v = 0.28
s_i = 600
cov = [
[44000, 0, i_x, v_x, s_x],
[ 0, 44000, i_y, v_y, s_y],
[ i_x, i_y,9000000, v_i, s_i],
[ v_x, v_y, v_i, 0.25, s_v],
[ s_x, s_y, s_i, s_v, 0.35],
]
In [17]:
x2, y2, i2, v2, s2 = np.random.multivariate_normal(mean, cov, n_hh_2).T
x2 = np.random.random(n_hh_2)*1000
y2 = np.random.random(n_hh_2)*800
# i2 = np.exp(np.random.random(n_hh_2)*3+8).astype(int)
# v2 = np.exp(np.random.random(n_hh_2)*1.3)
# s2 = v2 + np.exp(np.random.random(n_hh_2)*0.3)
# Assemble into a DataFrame
hh_locations2 = pd.DataFrame.from_dict({
'X':np.round(x2,2),
'Y':np.round(y2,2),
'INCOME':(i2),
'N_VEHICLES':np.exp(v2).astype(int),
'HHSIZE':np.exp(s2).astype(int)+1,
})
# Convert to a GeoDataFrame
hh_locations2 = gpd.GeoDataFrame(
hh_locations2,
geometry=gpd.points_from_xy(hh_locations2.X, hh_locations2.Y),
crs={},
)
# Attach HOMETAZ
hh_locations2 = gpd.sjoin(hh_locations2, taz_shape[['TAZ','geometry']], how='inner', op='within')
In [18]:
hh_locations2.head()
Out[18]:
In [19]:
hh_locations2.INCOME.statistics()
Out[19]:
In [20]:
hh_locations2.N_VEHICLES.statistics(discrete=True)
Out[20]:
In [21]:
hh_locations2.corr()
Out[21]:
In [22]:
#### Merge Groups
HH = pd.concat([hh_locations, hh_locations2], sort=False)
# Clean up
HH = HH.reset_index(drop=True)
HH.rename({'TAZ':'HOMETAZ'}, axis=1, inplace=True)
HH.drop('index_right', axis=1, inplace=True)
# Add HH size and ID
# HH['HHSIZE'] = (np.floor(
# np.random.binomial(5, 0.3, [n_hh, ])
# + 1
# + np.random.random([n_hh, ])
# )).astype(int)
#np.floor(np.random.exponential(0.8, [n_hh, ]) + 1 + np.random.random([n_hh, ])).astype(int)
HH['HHID'] = HH.index + 50000
In [23]:
HH.head()
Out[23]:
In [24]:
# HH.index = HH['HHID']
HH['HHSIZE'].statistics(discrete=True)
Out[24]:
In [25]:
HH['INCOME'].statistics()
Out[25]:
In [26]:
HH.info()
In [27]:
ax = taz_shape.plot(edgecolor='k', figsize=(10,10), color='w')
ax.set_xticks([])
ax.set_yticks([])
ax.set_frame_on(False)
HH.plot(ax=ax, color='red', alpha=0.25);
In [28]:
HHsize = HH['HHSIZE']
n_PER = np.sum(HHsize)
PER = {}
PERidx = PER['idx'] = np.arange(n_PER, dtype=np.int64)
PERid = PER['PERSONID'] = np.asarray([60000 + i for i in PER['idx']])
PERhhid = PER['HHID'] = np.zeros(n_PER, dtype=np.int64)
PERhhidx = PER['HHIDX'] = np.zeros(n_PER, dtype=np.int64)
PERage = PER['AGE'] = np.zeros(n_PER, dtype=np.int64)
n2 = 0
for n1 in range(n_hh):
PER['HHID'][n2:(n2 + HHsize[n1])] = HH['HHID'][n1]
PER['HHIDX'][n2:(n2 + HHsize[n1])] = HH.index[n1]
PER['AGE'][n2] = int(np.random.random() * 67 + 18)
if HHsize[n1]>1:
nphh = HHsize[n1]-1
PER['AGE'][n2+1:n2+1+nphh] = (np.random.random(nphh) * 80 + 5).astype(np.int64)
n2 += HHsize[n1]
PERworks = PER['WORKS'] = ((np.random.random(n_PER) > 0.15) & (PERage > 16) & (PERage < 70)).astype(np.int64)
PER = pd.DataFrame.from_dict(PER)
PER.drop('idx', axis=1, inplace=True)
In [29]:
PER['AGE'].statistics()
Out[29]:
In [30]:
HH.head()
Out[30]:
In [31]:
n_veh = HH[['N_VEHICLES', 'INCOME']]
n_veh.index = HH.HHID
PER = pd.merge(PER, n_veh, left_on='HHID', right_index=True)
In [32]:
vcounts = PER.N_VEHICLES.value_counts().sort_index()
vcounts
Out[32]:
In [33]:
PER.N_VEHICLES.statistics(discrete=True)
Out[33]:
In [34]:
PERnworktours = np.zeros_like(PERage)
PERnothertours = np.zeros_like(PERage)
i = 0
for nv,nt in vcounts.items():
if nv==0:
work_rates, other_rates = [0.25, 0.70, 0.04, 0.01], [0.74, 0.2, 0.05, 0.01]
elif nv==1:
work_rates, other_rates = [0.13, 0.74, 0.09, 0.04], [0.2, 0.5, 0.2, 0.1]
elif nv==2:
work_rates, other_rates = [0.10, 0.70, 0.17, 0.03], [0.1, 0.5, 0.3, 0.1]
elif nv==3:
work_rates, other_rates = [0.07, 0.61, 0.26, 0.06], [0.1, 0.4, 0.3, 0.2]
else:
work_rates, other_rates = [0.05, 0.53, 0.33, 0.09], [0.1, 0.3, 0.4, 0.2]
work_rates = np.array(work_rates)
other_rates = np.array(other_rates)
for income_under, income_over in pairwise([np.inf, 150_000, 120_000, 90_000, 60_000, 30_000, -np.inf]):
PERv = (PER.N_VEHICLES == nv) & (PER.INCOME>=income_over) & (PER.INCOME<income_under)
if income_over == 150_000:
work_rates_ = work_rates + np.array([-0.04, -0.10, 0.10, 0.04])
other_rates_ = other_rates + np.array([-0.07, -0.13, 0.13, 0.07])
elif income_over == 120_000:
work_rates_ = work_rates + np.array([-0.03, -0.08, 0.08, 0.03])
other_rates_ = other_rates + np.array([-0.05, -0.1, 0.13, 0.02])
elif income_over == 90_000:
work_rates_ = work_rates + np.array([-0.02, -0.06, 0.07, 0.01])
other_rates_ = other_rates + np.array([-0.03, -0.07, 0.08, 0.02])
elif income_over == 60_000:
work_rates_ = work_rates + np.array([-0.01, -0.04, 0.05, 0.0])
other_rates_ = other_rates + np.array([-0.02, -0.03, 0.04, 0.01])
elif income_over == 30_000:
work_rates_ = work_rates
other_rates_ = other_rates
else:
work_rates_ = work_rates + np.array([0.1, 0.05, -0.05, -0.1])
other_rates_ = other_rates + np.array([0.2, 0.05, -0.05, -0.2])
j = sum(PERv)
work_rates_[work_rates_<0.0001] = 0.0001
work_rates_ /= work_rates_.sum()
other_rates_[other_rates_<0.0001] = 0.0001
other_rates_ /= other_rates_.sum()
PERnworktours[PERv] = np.random.choice([0, 1, 2, 3], size=[j, ], replace=True, p=work_rates_).astype(np.int64) * PER['WORKS'][PERv]
PERnothertours[PERv] = np.random.choice([0, 1, 2, 3], size=[j, ], replace=True, p=other_rates_).astype(np.int64)
In [35]:
for income_under, income_over in pairwise([np.inf, 150_000, 120_000, 90_000, 60_000, 0]):
print(income_under, income_over)
In [36]:
## Add counts of tours
# PER['N_WORK_TOURS'] = PERnworktours = np.random.choice([0, 1, 2, 3], size=[n_PER, ], replace=True, p=[0.1, 0.8, 0.07, 0.03]).astype(
# np.int64) * PER['WORKS']
# PER['N_OTHER_TOURS'] = PERnothertours = np.random.choice([0, 1, 2, 3], size=[n_PER, ], replace=True, p=[0.2, 0.5, 0.2, 0.1]).astype(np.int64)
PER['N_WORK_TOURS'] = PERnworktours
PER['N_OTHER_TOURS'] = PERnothertours
PER['N_TOURS'] = PERntours = PERnworktours + PERnothertours
In [37]:
PER.INCOME.statistics()
Out[37]:
In [38]:
# PER.sort_index(inplace=True)
In [39]:
PER.head()
Out[39]:
In [40]:
total_employment = PER['WORKS'].sum()
total_employment
Out[40]:
In [41]:
#### Group 1 Retail Locus
n_retail_jobs_1 = int(total_employment * 0.1)
mean = [370, 560,]
cov = [
[3000, 0],
[0, 3000],
]
x, y = np.random.multivariate_normal(mean, cov, n_retail_jobs_1).T
retail_locations_1 = pd.DataFrame.from_dict({
'X':x,
'Y':y,
})
retail_locations_1 = gpd.GeoDataFrame(
retail_locations_1,
geometry=gpd.points_from_xy(retail_locations_1.X, retail_locations_1.Y),
crs={},
)
retail_locations_1 = gpd.sjoin(retail_locations_1, taz_shape[['TAZ','geometry']], how='inner', op='within')
ax =taz_shape.plot(edgecolor='k', color='w')
retail_locations_1.plot(ax=ax, color='red', alpha=0.1);
In [42]:
#### Group 2 Retail Background
n_retail_jobs_2 = int(total_employment * 0.1)
mean = [600, 250,]
cov = [
[22000, 0],
[0, 22000],
]
x, y = np.random.multivariate_normal(mean, cov, n_retail_jobs_2).T
retail_locations_2 = pd.DataFrame.from_dict({
'X':x,
'Y':y,
})
retail_locations_2 = gpd.GeoDataFrame(
retail_locations_2,
geometry=gpd.points_from_xy(retail_locations_2.X, retail_locations_2.Y),
crs={},
)
retail_locations_2 = gpd.sjoin(retail_locations_2, taz_shape[['TAZ','geometry']], how='inner', op='within')
ax =taz_shape.plot(edgecolor='k', color='w')
retail_locations_2.plot(ax=ax, color='red', alpha=0.1)
Out[42]:
In [43]:
#### Group 3 Downtown Locus
n_other_jobs_1 = int(total_employment * 0.5)
mean = [300, 250,]
cov = [
[22000, 0],
[0, 22000],
]
x, y = np.random.multivariate_normal(mean, cov, n_other_jobs_1).T
other_locations_1 = pd.DataFrame.from_dict({
'X':x,
'Y':y,
})
other_locations_1 = gpd.GeoDataFrame(
other_locations_1,
geometry=gpd.points_from_xy(other_locations_1.X, other_locations_1.Y),
crs={},
)
other_locations_1 = gpd.sjoin(other_locations_1, taz_shape[['TAZ','geometry']], how='inner', op='within')
ax =taz_shape.plot(edgecolor='k', color='w')
other_locations_1.plot(ax=ax, color='red', alpha=0.1)
Out[43]:
In [44]:
#### Group 4 Background
n_other_jobs_2 = total_employment - len(other_locations_1) - len(retail_locations_2) - len(retail_locations_1)
x2 = np.random.random(n_other_jobs_2)*1000
y2 = np.random.random(n_other_jobs_2)*800
other_locations_2 = pd.DataFrame.from_dict({
'X':x2,
'Y':y2,
})
other_locations_2 = gpd.GeoDataFrame(
other_locations_2,
geometry=gpd.points_from_xy(other_locations_2.X, other_locations_2.Y),
crs={},
)
other_locations_2 = gpd.sjoin(other_locations_2, taz_shape[['TAZ','geometry']], how='inner', op='within')
ax =taz_shape.plot(edgecolor='k', color='w')
other_locations_2.plot(ax=ax, color='red', alpha=0.1);
In [45]:
## Merge all employment
retail_locations_1['JOBTYPE'] = 'retail'
retail_locations_2['JOBTYPE'] = 'retail'
other_locations_1['JOBTYPE'] = 'nonretail'
other_locations_2['JOBTYPE'] = 'nonretail'
job_location = pd.concat([retail_locations_1,retail_locations_2,other_locations_1,other_locations_2])
job_location = job_location.reset_index(drop=True)
job_location['JOBTYPE'] = job_location['JOBTYPE'].astype('category')
ax =taz_shape.plot(edgecolor='k', color='w')
job_location.plot(ax=ax, color='red', alpha=0.1);
In [46]:
taz_employment = job_location.groupby(['TAZ','JOBTYPE']).size().unstack().fillna(0).astype(int)
taz_employment.rename({'retail':'RETAIL_EMP', 'nonretail':'NONRETAIL_EMP'}, axis=1, inplace=True)
In [47]:
taz_employment['TOTAL_EMP'] = taz_employment['NONRETAIL_EMP'] + taz_employment['RETAIL_EMP']
In [48]:
taz_employment.columns.name = None
In [49]:
taz_employment.head()
Out[49]:
In [50]:
# Save
taz_employment.to_csv(larch.exampville._files(seed).employment)
In [ ]:
In [51]:
from itertools import tee
def pairwise(iterable):
"s -> (s0,s1), (s1,s2), (s2, s3), ..."
a, b = tee(iterable)
next(b, None)
return zip(a, b)
In [52]:
highway_route = [12,2,10,23,24,26,27,39,11]
transit_line = [39,6,36,25,29,17,31,14,34]
In [53]:
speed_road = 3 # minutes per mile
speed_highway = 1 # minutes per mile
speed_train = 1.2 # minutes per mile
speed_walk = 20 # minutes per mile
speed_bike = 5 # minutes per mile
In [54]:
g = nx.DiGraph()
for index, zone in taz_shape.iterrows():
# get 'not disjoint' countries
neighbors = taz_shape[~taz_shape.geometry.disjoint(zone.geometry)].TAZ.tolist()
neighborc = taz_shape[~taz_shape.geometry.disjoint(zone.geometry)].geometry.centroid.tolist()
# add names of neighbors as NEIGHBORS value
for name, cent in zip(neighbors,neighborc):
if zone.TAZ != name:
otaz, dtaz = int(zone.TAZ), int(name)
distance = (zone.geometry.centroid.distance(cent)) / 100
if otaz in highway_route and dtaz in highway_route:
cartime = distance * speed_highway
else:
cartime = distance * speed_road
if otaz in [25,36,3]:
cartime += 3 # congestion
if dtaz in [25,36,3]:
cartime += 5 # congestion
if otaz in transit_line and dtaz in transit_line:
transit_ivtt = distance * speed_train
transit_ovtt = 999999
transit_time = transit_ivtt
else:
transit_ivtt = 999999
transit_ovtt = distance * speed_walk
transit_time = transit_ovtt
g.add_edge(
otaz, dtaz,
distance=distance, cartime=cartime,
transit_ovtt=transit_ovtt,
transit_ivtt=transit_ivtt,
transit_time=transit_time,
)
In [55]:
taz_shape.index = taz_shape.TAZ
In [56]:
centroids = taz_shape.centroid
In [57]:
## Highway Map
ax = taz_shape.plot(edgecolor='w')
nx.draw_networkx_edges(
g,
pos={i:p.coords[0] for i,p in centroids.iteritems()},
ax=ax,
arrows=False,
edgelist = list(pairwise(highway_route))
);
In [58]:
## Transit Map
ax = taz_shape.plot(edgecolor='w')
nx.draw_networkx_edges(
g,
pos={i:p.coords[0] for i,p in centroids.iteritems()},
ax=ax,
arrows=False,
edgelist = list(pairwise(transit_line))
);
In [59]:
# Skim walk times
WALKDIST = np.zeros([40,40])
for otaz in range(1,41):
shortpaths = nx.shortest_path_length(g, source=otaz, weight='distance')
for dtaz,t in shortpaths.items():
WALKDIST[otaz-1,dtaz-1] = t
# Intrazonal
WALKDIST[otaz-1,otaz-1] = np.sqrt(taz_shape.loc[otaz,'geometry'].area)/100
print(WALKDIST)
In [ ]:
In [60]:
# Skim car times
CARTIME = np.zeros([40,40])
CARDIST = np.zeros([40,40])
for otaz in range(1,41):
shortpaths = nx.shortest_path(g, source=otaz, weight='cartime')
for dtaz,pth in shortpaths.items():
cartime, cardist = 0,0
for i,j in pairwise(pth):
cartime += g.edges[i,j]['cartime']
cardist += g.edges[i,j]['distance']
CARTIME[otaz-1,dtaz-1] = cartime
CARDIST[otaz-1,dtaz-1] = cardist
# Intrazonal
intrazonal_dist = np.sqrt(taz_shape.loc[otaz,'geometry'].area)/100
CARTIME[otaz-1,otaz-1] = intrazonal_dist * speed_road
CARDIST[otaz-1,otaz-1] = intrazonal_dist
if otaz in [25,36,3]:
CARTIME[otaz-1,otaz-1] += 5 # congestion
print(CARTIME)
In [61]:
## Skim Transit times
from itertools import tee
def pairwise(iterable):
"s -> (s0,s1), (s1,s2), (s2, s3), ..."
a, b = tee(iterable)
next(b, None)
return zip(a, b)
TRANSIT_IVTT = np.zeros([40,40])
TRANSIT_OVTT = np.zeros([40,40])
for otaz in range(1,41):
shortpaths = nx.shortest_path(g, source=otaz, weight='transit_time')
for dtaz,pth in shortpaths.items():
ivtt, ovtt = 0,0
for i,j in pairwise(pth):
if g.edges[i,j]['transit_ivtt'] < 999999:
ivtt += g.edges[i,j]['transit_ivtt']
else:
ovtt += g.edges[i,j]['transit_ovtt']
if ovtt == 0:
ovtt = np.sqrt(taz_shape.loc[otaz,'geometry'].area)/150 + np.sqrt(taz_shape.loc[dtaz,'geometry'].area)/150
TRANSIT_IVTT[otaz-1,dtaz-1] = ivtt
TRANSIT_OVTT[otaz-1,dtaz-1] = ovtt
In [62]:
print(TRANSIT_IVTT[:5,:5])
In [63]:
print(TRANSIT_IVTT[5:10,5:10])
In [64]:
print(TRANSIT_OVTT[:5,:5])
In [65]:
nx.shortest_path(g, source=30, weight='transit_time')[17]
Out[65]:
In [66]:
TRANSIT_OVTT[30-1,17-1]
Out[66]:
In [67]:
print(TRANSIT_OVTT[5:10,5:10])
In [68]:
if os.path.exists(larch.exampville.files.skims):
os.remove(larch.exampville.files.skims)
In [69]:
## Assemble Skims into an OMX File
skims_omx = larch.OMX(larch.exampville._files(seed).skims, mode='w')
In [70]:
skims_omx.add_matrix('TRANSIT_IVTT', TRANSIT_IVTT)
skims_omx.add_matrix('TRANSIT_OVTT', TRANSIT_OVTT)
skims_omx.add_matrix('TRANSIT_FARE', (TRANSIT_IVTT>0)*2.50)
skims_omx.add_matrix('WALK_DIST', WALKDIST)
skims_omx.add_matrix('WALK_TIME', WALKDIST * speed_walk)
skims_omx.add_matrix('BIKE_TIME', WALKDIST * speed_bike)
skims_omx.add_matrix('AUTO_TIME', CARTIME)
skims_omx.add_matrix('AUTO_COST', CARDIST * 0.35)
skims_omx.add_matrix('AUTO_DIST', CARDIST);
In [71]:
taz_ids = np.arange(nZones)+1
skims_omx.add_lookup('TAZ_ID', taz_ids);
In [72]:
taz_ids = np.arange(nZones)+1
taz_area_types = np.full(40, 'SUB')
taz_area_types[np.in1d(taz_ids, zones_cbd)] = 'CBD'
taz_area_types[np.in1d(taz_ids, zones_urb)] = 'URB'
In [73]:
skims_omx.add_lookup('TAZ_AREA_TYPE', taz_area_types);
In [74]:
skims_omx.close()
In [75]:
skims_omx = larch.OMX(larch.exampville._files(seed).skims, mode='r')
In [76]:
# Tour Modes
DA = 1
SR = 2
Walk = 3
Bike = 4
Transit = 5
In [77]:
## Tours
n_TOUR = PERntours.sum()
TOURid = np.arange(n_TOUR, dtype=np.int64)
TOURper = np.zeros(n_TOUR, dtype=np.int64)
TOURperidx = np.zeros(n_TOUR, dtype=np.int64)
TOURhh = np.zeros(n_TOUR, dtype=np.int64)
TOURhhidx = np.zeros(n_TOUR, dtype=np.int64)
TOURdtaz = np.zeros(n_TOUR, dtype=np.int64)
TOURstops = np.zeros(n_TOUR, dtype=np.int64)
TOURmode = np.zeros(n_TOUR, dtype=np.int64)
TOURpurpose = np.zeros(n_TOUR, dtype=np.int64)
# Work tours, then other tours
n2 = 0
for n1 in range(n_PER):
TOURper[n2:(n2 + PERntours[n1])] = PERid[n1]
TOURperidx[n2:(n2 + PERntours[n1])] = PERidx[n1]
TOURhh[n2:(n2 + PERntours[n1])] = PERhhid[n1]
TOURhhidx[n2:(n2 + PERntours[n1])] = PERhhidx[n1]
TOURpurpose[n2:(n2 + PERnworktours[n1])] = 1
TOURpurpose[(n2 + PERnworktours[n1]):(n2 + PERntours[n1])] = 2
TOURstops[n2:(n2 + PERnworktours[n1])] = np.random.choice(
[0, 1, 2, 3],
size=[PERnworktours[n1], ],
replace=True,
p=[0.8, 0.1, 0.05, 0.05],
).astype(np.int64)
TOURstops[(n2 + PERnworktours[n1]):(n2 + PERntours[n1])] = np.random.choice(
[0, 1, 2, 3, 4, 5],
size=[PERntours[n1]-PERnworktours[n1], ],
replace=True,
p=[0.4, 0.15, 0.15, 0.15, 0.1, 0.05],
).astype(np.int64)
n2 += PERntours[n1]
In [78]:
PERnworktours[:5], PERworks[:5], PERid[:5]
Out[78]:
In [79]:
#### Utility by mode to various destinations
nameModes = ['DA', 'SR', 'Walk', 'Bike', 'Transit']
mDA = 0
mSR = 1
mWA = 2
mBI = 3
mTR = 4
nModes = len(nameModes)
nModeNests = 3
paramCOST = -0.312
paramTIME = -0.123
paramNMTIME = -0.246
paramDIST = -0.00357
paramLNDIST = -0.00642
paramMUcar = 0.5
paramMUnon = 0.75
paramMUmot = 0.8
paramMUtop = 1.0
In [80]:
zone_retail = taz_employment.RETAIL_EMP
zone_nonretail = taz_employment.NONRETAIL_EMP
In [81]:
Util = np.zeros([n_TOUR, nZones, nModes])
for n in range(n_TOUR):
# Mode
otazi = HH.HOMETAZ[TOURhhidx[n]] - 1
Util[n, :, mDA] += (
+ skims_omx.AUTO_TIME[otazi, :] * paramTIME
+ skims_omx.AUTO_COST[otazi, :] * paramCOST
)
if HH.INCOME[TOURhhidx[n]] >= 75000:
Util[n, :, mDA] += 1.0
Util[n, :, mTR] -= 0.5
Util[n, :, mSR] += (
+ skims_omx.AUTO_TIME[otazi, :] * paramTIME
- 1.0
+ skims_omx.AUTO_COST[otazi, :] * paramCOST * 0.5
)
Util[n, :, mWA] += 3.0 + skims_omx.WALK_TIME[otazi, :] * paramNMTIME
Util[n, :, mBI] += -2.25 + skims_omx.BIKE_TIME[otazi, :] * paramNMTIME
Util[n, :, mTR] += (
+ 1.5
+ skims_omx.TRANSIT_IVTT[otazi, :] * paramTIME
+ skims_omx.TRANSIT_OVTT[otazi, :] * paramTIME * 2.2
+ skims_omx.TRANSIT_FARE[otazi, :] * paramCOST
)
Util[n, :, mDA] += 0.33 * TOURstops[n] - 0.1
# Destination
Util[n, :, :] += skims_omx.AUTO_DIST[:][otazi, :, None] * paramDIST + np.log1p(skims_omx.AUTO_DIST[:][otazi, :, None]) * paramLNDIST
if HH.INCOME[TOURhhidx[n]] <= 50000:
Util[n, :, :] += 0.75 * np.log(zone_retail * 2.71828 + zone_nonretail)[:, None]
else:
Util[n, :, :] += 0.75 * np.log(zone_retail + zone_nonretail * 2.71828)[:, None]
# flog('Util[n,:,:] ...')
# flog('{}',Util[n,:,:])
# Unavails
if PERage[TOURperidx[n]] < 16:
Util[n, :, mDA] = -np.inf
Util[n, skims_omx.TRANSIT_FARE[otazi, :] <= 0, mTR] = -np.inf
Util[n, skims_omx.WALK_TIME[otazi, :] >= 60, mWA] = -np.inf
Util[n, skims_omx.BIKE_TIME[otazi, :] >= 60, mBI] = -np.inf
In [82]:
from numpy import log, exp
In [83]:
CPr_car = np.zeros([n_TOUR, nZones, 2]) # [DA,SR]
CPr_non = np.zeros([n_TOUR, nZones, 2]) # [WA,BI]
CPr_mot = np.zeros([n_TOUR, nZones, 2]) # [TR,Car]
CPr_top = np.zeros([n_TOUR, nZones, 2]) # [Non,Mot]
NLS_car = np.zeros([n_TOUR, nZones, ])
NLS_non = np.zeros([n_TOUR, nZones, ])
NLS_mot = np.zeros([n_TOUR, nZones, ])
MLS_top = np.zeros([n_TOUR, nZones, ]) # Mode choice logsum
DLS_top = np.zeros([n_TOUR, ]) # Dest choice logsum
Pr_modes = np.zeros([n_TOUR, nZones, nModes])
Pr_dest = np.zeros([n_TOUR, nZones])
with np.errstate(divide='ignore', invalid='ignore'):
for n in range(n_TOUR):
NLS_car[n, :] = paramMUcar * log(np.exp(Util[n, :, mDA] / paramMUcar) + exp(Util[n, :, mSR] / paramMUcar))
NLS_non[n, :] = paramMUnon * log(np.exp(Util[n, :, mWA] / paramMUnon) + exp(Util[n, :, mBI] / paramMUnon))
NLS_mot[n, :] = paramMUmot * log(np.exp(NLS_car[n, :] / paramMUmot) + exp(Util[n, :, mTR] / paramMUmot))
MLS_top[n, :] = log(exp(NLS_non[n, :]) + exp(NLS_mot[n, :]))
DLS_top[n] = log(np.sum(exp(MLS_top[n, :])))
Pr_dest[n, :] = exp(MLS_top[n, :] - DLS_top[n])
CPr_top[n, :, 0] = exp((NLS_non[n, :] - MLS_top[n, :]) / paramMUtop)
CPr_top[n, :, 1] = exp((NLS_mot[n, :] - MLS_top[n, :]) / paramMUtop)
CPr_mot[n, :, 0] = exp((Util[n, :, mTR] - NLS_mot[n, :]) / paramMUmot)
CPr_mot[n, :, 1] = exp((NLS_car[n, :] - NLS_mot[n, :]) / paramMUmot)
CPr_non[n, :, 0] = exp((Util[n, :, mWA] - NLS_non[n, :]) / paramMUnon)
CPr_non[n, :, 1] = exp((Util[n, :, mBI] - NLS_non[n, :]) / paramMUnon)
CPr_car[n, :, 0] = exp((Util[n, :, mDA] - NLS_car[n, :]) / paramMUcar)
CPr_car[n, :, 1] = exp((Util[n, :, mSR] - NLS_car[n, :]) / paramMUcar)
Pr_modes[n, :, mTR] = CPr_mot[n, :, 0] * CPr_top[n, :, 1] * Pr_dest[n, :]
Pr_modes[n, :, mWA] = CPr_non[n, :, 0] * CPr_top[n, :, 0] * Pr_dest[n, :]
Pr_modes[n, :, mBI] = CPr_non[n, :, 1] * CPr_top[n, :, 0] * Pr_dest[n, :]
Pr_modes[n, :, mDA] = CPr_car[n, :, 0] * CPr_mot[n, :, 1] * CPr_top[n, :, 1] * Pr_dest[n, :]
Pr_modes[n, :, mSR] = CPr_car[n, :, 1] * CPr_mot[n, :, 1] * CPr_top[n, :, 1] * Pr_dest[n, :]
Pr_modes[np.isnan(Pr_modes)] = 0
In [84]:
## Choices
for n in range(n_TOUR):
try:
ch = np.random.choice(nModes * nZones, replace=True, p=Pr_modes[n, :, :].ravel())
except:
print("total prob = {}", Pr_modes[n, :, :].sum())
raise
dtazi = ch // nModes
modei = ch - (dtazi * nModes)
TOURdtaz[n] = dtazi + 1
TOURmode[n] = modei + 1
In [85]:
f_tour = pd.DataFrame.from_dict(
dict([
('TOURID', TOURid),
('HHID', TOURhh),
('PERSONID', TOURper),
('DTAZ', TOURdtaz),
('TOURMODE', TOURmode),
('TOURPURP', TOURpurpose),
('N_STOPS', TOURstops),
])
)
# f_tour_filename = os.path.join(directory, 'exampville_tours.csv')
# f_tour.to_csv(f_tour_filename)
In [86]:
f_tour.set_index('TOURID', inplace=True)
In [ ]:
In [87]:
DA = 1
SR = 2
Walk = 3
Bike = 4
Transit = 5
In [88]:
dfs = larch.DataFrames(
co=f_tour,
alt_codes=[DA,SR,Walk,Bike,Transit],
alt_names=['DA','SR','Walk','Bike','Transit'],
ch_name='TOURMODE',
)
In [89]:
dfs.data_ch
Out[89]:
In [90]:
dfs.choice_avail_summary()
Out[90]:
In [91]:
# Trips Per Person
f_tour['N_TRIPS'] = 2+f_tour['N_STOPS']
In [92]:
f_tour['N_TRIPS_HBW'] = (f_tour.TOURPURP==1)*2 - ((f_tour.TOURPURP==1)&(f_tour.N_STOPS>0))
f_tour['N_TRIPS_HBO'] = ((f_tour.TOURPURP==1)*2 - ((f_tour.TOURPURP==1)&(f_tour.N_STOPS>0))==1) + (f_tour.TOURPURP==2)*2
f_tour['N_TRIPS_NHB'] = f_tour['N_TRIPS'] - f_tour['N_TRIPS_HBW'] - f_tour['N_TRIPS_HBO']
In [93]:
f_tour
Out[93]:
In [94]:
PER.index = PER.PERSONID
for ttype in [
'N_TRIPS',
'N_TRIPS_HBW',
'N_TRIPS_HBO',
'N_TRIPS_NHB',
]:
PER[ttype] = f_tour.groupby('PERSONID')[ttype].sum()
PER[ttype] = PER[ttype].fillna(0).astype(int)
In [95]:
PER
Out[95]:
In [96]:
HH.index = HH.HHID
for ttype in [
'N_TRIPS',
'N_TRIPS_HBW',
'N_TRIPS_HBO',
'N_TRIPS_NHB',
]:
HH[ttype] = PER.groupby('HHID')[ttype].sum()
HH[ttype] = HH[ttype].fillna(0).astype(int)
In [97]:
HH['N_WORKERS'] = PER.groupby('HHID').WORKS.sum()
HH['N_WORKERS'] = HH.N_WORKERS.fillna(0).astype(int)
In [98]:
HH['N_WORKERS'].statistics(discrete=True)
Out[98]:
In [99]:
PER.info()
In [100]:
w, b1, b2 = np.histogram2d(
PER.N_VEHICLES,
PER.N_WORK_TOURS,
bins=[
np.array([0,1,2,3,4,5,6,7])-0.5,
[0,1,2,3,4,5,6,7,8,9,10,12,14,16],
],
)
In [101]:
ww = pd.DataFrame(
w,
index=[0,1,2,3,4,5,6],
columns=[f"{i}" for i,j in zip(b2[:-1], b2[1:])],
)
ww.T.div(ww.T.sum(0)).T
Out[101]:
In [102]:
import seaborn as sns
sns.heatmap(
data=pd.DataFrame(
w,
index=[0,1,2,3,4,5,6],
columns=[f"{i}" for i,j in zip(b2[:-1], b2[1:])],
),
annot=True,
fmt=".0f",
)
Out[102]:
In [103]:
hh_means = HH.groupby('HOMETAZ')[['INCOME', 'N_VEHICLES', 'HHSIZE',
'N_TRIPS', 'N_TRIPS_HBW', 'N_TRIPS_HBO', 'N_TRIPS_NHB',
'N_WORKERS']].mean().add_prefix('MEAN_')
In [104]:
hh_sums = HH.groupby('HOMETAZ')[['N_VEHICLES', 'HHSIZE',
'N_TRIPS', 'N_TRIPS_HBW', 'N_TRIPS_HBO', 'N_TRIPS_NHB',
'N_WORKERS']].sum()
In [105]:
hh_sums.rename(columns={
'HHSIZE':"TOTAL_POP",
'N_WORKERS':"TOTAL_WORKERS",
'N_VEHICLES':"TOTAL_VEHICLES",
'N_TRIPS':"TOTAL_TRIPS",
'N_TRIPS_HBW':'TOTAL_TRIPS_HBW',
'N_TRIPS_HBO':"TOTAL_TRIPS_HBO",
'N_TRIPS_NHB':"TOTAL_TRIPS_NHB",
}, inplace=True)
In [106]:
taz_agg = pd.concat([hh_means, hh_sums], axis=1)
In [107]:
taz_agg['N_HOUSEHOLDS'] = HH.groupby('HOMETAZ').size()
In [108]:
taz_agg.index.name = "TAZ"
In [109]:
# Drop income and n_veh from persons, they are hh attributes for our demos
PER.drop(['N_VEHICLES','INCOME'], axis=1, inplace=True)
In [110]:
HH.to_csv(larch.exampville._files(seed).hh, index=False)
PER.to_csv(larch.exampville._files(seed).person, index=False)
f_tour.to_csv(larch.exampville._files(seed).tour)
taz_agg.to_csv(larch.exampville._files(seed).demographics)
In [111]:
larch.exampville._files(seed).demographics
Out[111]:
In [112]:
os.getcwd()
Out[112]:
In [ ]: