In [1]:
import numpy as np, pandas as pd, os
from synthicity.utils import misc
from drcog.models import regression_model_estimation, choice_model_estimation, dataset
dset = dataset.DRCOGDataset(os.path.join(misc.data_dir(),'drcog.h5'))
np.random.seed(1)
##Variable Library
from drcog.variables import variable_library
variable_library.calculate_variables(dset)
buildings = dset.fetch('buildings')[['building_type_id','improvement_value','land_area','non_residential_sqft','parcel_id','residential_units','sqft_per_unit','stories','tax_exempt','year_built','bldg_sq_ft','unit_price_non_residential','unit_price_residential','building_sqft_per_job','non_residential_units','base_year_jobs','all_units']]
establishments = dset.fetch('establishments')
del establishments['zone_id']
del establishments['county_id']
households = dset.fetch('households')
del households['zone_id']
del households['county_id']
parcels = dset.fetch('parcels')
parcels_urbancen = dset.store.parcels_urbancen.set_index('parcel_id')
parcels['urbancenter_id'] = parcels_urbancen.urban_cen
zones = dset.fetch('zones')
pz = pd.merge(parcels.reset_index(),zones,left_on='zone_id',right_index=True,how='left')
pz = pz.set_index('parcel_id')
bpz = pd.merge(buildings,pz,left_on='parcel_id',right_index=True)
##Merge buildings and parcels
buildings = pd.merge(buildings,parcels,left_on='parcel_id',right_index=True)
##Merge households with bulidings/parcels
households = pd.merge(households,buildings,left_on='building_id',right_index=True)
##Merge establishments with bulidings/parcels
establishments = pd.merge(establishments,buildings,left_on='building_id',right_index=True)
#####Export household points
hh = households[['building_id']].reset_index()
hh['parcel_id'] = bpz.parcel_id[hh.building_id].values
hh['urbancenter_id'] = bpz.urbancenter_id[hh.building_id].values
hh['x'] = bpz.x[hh.building_id].values.astype('int64')
hh['y'] = bpz.y[hh.building_id].values.astype('int64')
hh['taz05_id'] = bpz.external_zone_id[hh.building_id].values
hh['dist_trans'] = np.minimum(bpz.dist_rail[hh.building_id].values, bpz.dist_bus[hh.building_id].values)/5280.0
big_parcels = parcels.index.values[parcels.parcel_sqft>= 435600]
big_parcel_ids_with_hh = np.unique(hh.parcel_id[np.in1d(hh.parcel_id,big_parcels)].values)
parcel_coords = dset.parcel_coords
parcel_coords.x = parcel_coords.x.astype('int64')
parcel_coords.y = parcel_coords.y.astype('int64')
for parcel_id in big_parcel_ids_with_hh:
idx_hh_on_parcel = np.in1d(hh.parcel_id,[parcel_id,])
coords = parcel_coords[parcel_coords.parcel_id==parcel_id]
idx_coord = np.random.choice(coords.index,size=idx_hh_on_parcel.sum(),replace=True)
x = coords.x.loc[idx_coord].values
y = coords.y.loc[idx_coord].values
hh.x[idx_hh_on_parcel] = x
hh.y[idx_hh_on_parcel] = y
In [2]:
hh.describe()
Out[2]:
In [ ]:
e = establishments.reset_index()
for idx in e.index:
for job in range(e.employees[idx]):
bids.append(e.building_id[idx])
eids.append(e.index[idx])
hbs.append(e.home_based_status[idx])
sids.append(e.sector_id[idx])
print len(bids)
print len(eids)
print len(hbs)
print len(sids)
jobs = pd.DataFrame({'job_id':range(1,len(bids)+1),'building_id':bids,'establishment_id':eids,'home_based_status':hbs,'sector_id':sids})
jobs['parcel_id'] = bpz.parcel_id[jobs.building_id].values
jobs['urbancenter_id'] = bpz.urbancenter_id[jobs.building_id].values
jobs['x'] = bpz.centroid_x[jobs.building_id].values.astype('int64')
jobs['y'] = bpz.centroid_y[jobs.building_id].values.astype('int64')
big_parcel_ids_with_jobs = np.unique(jobs.parcel_id[np.in1d(jobs.parcel_id,big_parcels)].values)
for parcel_id in big_parcel_ids_with_jobs:
idx_jobs_on_parcel = np.in1d(jobs.parcel_id,[parcel_id,])
coords = parcel_coords[parcel_coords.parcel_id==parcel_id]
idx_coord = np.random.choice(coords.index,size=idx_jobs_on_parcel.sum(),replace=True)
x = coords.x.loc[idx_coord].values
y = coords.y.loc[idx_coord].values
jobs.x[idx_jobs_on_parcel] = x
jobs.y[idx_jobs_on_parcel] = y
In [ ]:
jobs.describe()