In [1]:
import numpy as np, pandas as pd, os
from synthicity.utils import misc
from drcog.models import regression_model_estimation, choice_model_estimation, dataset
dset = dataset.DRCOGDataset(os.path.join(misc.data_dir(),'drcog.h5'))
np.random.seed(1)
##Variable Library
from drcog.variables import variable_library
variable_library.calculate_variables(dset)
buildings = dset.fetch('buildings')[['building_type_id','improvement_value','land_area','non_residential_sqft','parcel_id','residential_units','sqft_per_unit','stories','tax_exempt','year_built','bldg_sq_ft','unit_price_non_residential','unit_price_residential','building_sqft_per_job','non_residential_units','base_year_jobs','all_units']]
establishments = dset.fetch('establishments')
del establishments['zone_id']
del establishments['county_id']
households = dset.fetch('households')
del households['zone_id']
del households['county_id']
parcels = dset.fetch('parcels')
parcels_urbancen = dset.store.parcels_urbancen.set_index('parcel_id')
parcels['urbancenter_id'] = parcels_urbancen.urban_cen
zones = dset.fetch('zones')
pz = pd.merge(parcels.reset_index(),zones,left_on='zone_id',right_index=True,how='left')
pz = pz.set_index('parcel_id')
bpz = pd.merge(buildings,pz,left_on='parcel_id',right_index=True)
##Merge buildings and parcels
buildings = pd.merge(buildings,parcels,left_on='parcel_id',right_index=True)
##Merge households with bulidings/parcels
households = pd.merge(households,buildings,left_on='building_id',right_index=True)
##Merge establishments with bulidings/parcels
establishments = pd.merge(establishments,buildings,left_on='building_id',right_index=True)
#####Export jobs table
e = establishments.reset_index()
bids = []
eids = []
hbs = []
sids = []
for idx in e.index:
for job in range(e.employees[idx]):
bids.append(e.building_id[idx])
eids.append(e.index[idx])
hbs.append(e.home_based_status[idx])
sids.append(e.sector_id[idx])
print len(bids)
print len(eids)
print len(hbs)
print len(sids)
jobs = pd.DataFrame({'job_id':range(1,len(bids)+1),'building_id':bids,'establishment_id':eids,'home_based_status':hbs,'sector_id':sids})
jobs['parcel_id'] = bpz.parcel_id[jobs.building_id].values
jobs['urbancenter_id'] = bpz.urbancenter_id[jobs.building_id].values
jobs['x'] = bpz.centroid_x[jobs.building_id].values
jobs['y'] = bpz.centroid_y[jobs.building_id].values
jobs['taz05_id'] = bpz.external_zone_id[jobs.building_id].values
jobs['sector_id_six'] = 1*(jobs.sector_id==61) + 2*(jobs.sector_id==71) + 3*np.in1d(jobs.sector_id,[11,21,22,23,31,32,33,42,48,49]) + 4*np.in1d(jobs.sector_id,[7221,7222,7224]) + 5*np.in1d(jobs.sector_id,[44,45,7211,7212,7213,7223]) + 6*np.in1d(jobs.sector_id,[51,52,53,54,55,56,62,81,92])
jobs['jobtypename'] = ''
jobs.jobtypename[jobs.sector_id_six==1] = 'Education'
jobs.jobtypename[jobs.sector_id_six==2] = 'Entertainment'
jobs.jobtypename[jobs.sector_id_six==3] = 'Production'
jobs.jobtypename[jobs.sector_id_six==4] = 'Restaurant'
jobs.jobtypename[jobs.sector_id_six==5] = 'Retail'
jobs.jobtypename[jobs.sector_id_six==6] = 'Service'
del jobs['sector_id_six']
del jobs['building_id']
del jobs['establishment_id']
del jobs['home_based_status']
del jobs['sector_id']
jobs.rename(columns={'job_id':'tempid'},inplace=True)
#jobs.to_csv(tm_input_dir+'\\jobs%s.csv'%sim_year,index=False)
#####Export household points
hh = households[['building_id']].reset_index()
hh['parcel_id'] = bpz.parcel_id[hh.building_id].values
hh['urbancenter_id'] = bpz.urbancenter_id[hh.building_id].values
hh['x'] = bpz.centroid_x[hh.building_id].values
hh['y'] = bpz.centroid_y[hh.building_id].values
hh['taz05_id'] = bpz.external_zone_id[hh.building_id].values
hh['dist_trans'] = np.minimum(bpz.dist_rail[hh.building_id].values, bpz.dist_bus[hh.building_id].values)/5280.0
In [2]:
parcel_coords = dset.parcel_coords
In [3]:
parcel_coords.x = parcel_coords.x.astype('int64')
parcel_coords.y = parcel_coords.y.astype('int64')
In [4]:
big_parcels = parcels.index.values[parcels.parcel_sqft>= 435600]
In [5]:
big_parcel_ids_with_hh = np.unique(hh.parcel_id[np.in1d(hh.parcel_id,big_parcels)].values)
In [6]:
len(big_parcel_ids_with_hh)
Out[6]:
In [7]:
i = 0
for parcel_id in big_parcel_ids_with_hh:
idx_hh_on_parcel = np.in1d(hh.parcel_id,[parcel_id,])
coords = parcel_coords[parcel_coords.parcel_id==parcel_id]
idx_coord = np.random.choice(coords.index,size=idx_hh_on_parcel.sum(),replace=True)
x = coords.x.loc[idx_coord].values
y = coords.y.loc[idx_coord].values
print hh.x[idx_hh_on_parcel]
hh.x[idx_hh_on_parcel] = x
hh.y[idx_hh_on_parcel] = y
print hh.x[idx_hh_on_parcel]
i+=1
if i==20:
break
In [8]:
big_parcel_ids_with_jobs = np.unique(jobs.parcel_id[np.in1d(jobs.parcel_id,big_parcels)].values)
print len(big_parcel_ids_with_jobs)
In [9]:
i = 0
for parcel_id in big_parcel_ids_with_jobs:
idx_jobs_on_parcel = np.in1d(jobs.parcel_id,[parcel_id,])
coords = parcel_coords[parcel_coords.parcel_id==parcel_id]
idx_coord = np.random.choice(coords.index,size=idx_jobs_on_parcel.sum(),replace=True)
x = coords.x.loc[idx_coord].values
y = coords.y.loc[idx_coord].values
print jobs.x[idx_jobs_on_parcel]
jobs.x[idx_jobs_on_parcel] = x
jobs.y[idx_jobs_on_parcel] = y
print jobs.x[idx_jobs_on_parcel]
i+=1
if i==20:
break
In [8]:
hh.x.dtype
Out[8]:
In [9]:
coords.x.dtype
Out[9]:
In [12]:
jobs.x.dtype
Out[12]: