In [1]:
import pandas as pd
import numpy as np
hh = pd.read_csv('C:\\urbansim\\data\\travel_model\\2015\\housing_units2015.csv')
print (hh.x<1).sum()
print (hh.y<1).sum()
print (hh.taz05_id<0).sum()
synth_hh = pd.read_csv('C:\\urbansim\\data\\travel_model\\2015\\SynHH2015.csv')
synth_p = pd.read_csv('C:\\urbansim\\data\\travel_model\\2015\\SynPers2015.csv')
print len(np.unique(synth_hh.hhid))
print len(np.unique(synth_p.hhid))
print (np.unique(synth_hh.hhid).values != np.unique(synth_p.hhid).values).sum()
In [1]:
import numpy as np, pandas as pd, os
from synthicity.utils import misc
from drcog.models import regression_model_estimation, choice_model_estimation, dataset
dset = dataset.DRCOGDataset(os.path.join(misc.data_dir(),'drcog.h5'))
np.random.seed(1)
##Variable Library
from drcog.variables import variable_library
variable_library.calculate_variables(dset)
buildings = dset.fetch('buildings')[['building_type_id','improvement_value','land_area','non_residential_sqft','parcel_id','residential_units','sqft_per_unit','stories','tax_exempt','year_built','bldg_sq_ft','unit_price_non_residential','unit_price_residential','building_sqft_per_job','non_residential_units','base_year_jobs','all_units']]
parcels = dset.fetch('parcels')
parcels_urbancen = dset.store.parcels_urbancen.set_index('parcel_id')
parcels['urbancenter_id'] = parcels_urbancen.urban_cen
zones = dset.fetch('zones')
pz = pd.merge(parcels.reset_index(),zones,left_on='zone_id',right_index=True,how='left')
pz = pz.set_index('parcel_id')
bpz = pd.merge(buildings,pz,left_on='parcel_id',right_index=True)
print bpz[['x','y']].describe()
establishments = dset.fetch('establishments')
del establishments['zone_id']
del establishments['county_id']
##Merge establishments with bulidings/parcels
establishments = pd.merge(establishments,buildings,left_on='building_id',right_index=True)
##Available parcel coordinates (includes random x,y for big parcels)
parcel_coords = dset.parcel_coords
parcel_coords.x = parcel_coords.x.astype('int64')
parcel_coords.y = parcel_coords.y.astype('int64')
big_parcels = parcels.index.values[parcels.parcel_sqft>= 435600]
print parcel_coords[['x','y']].describe()
e = establishments.reset_index()
bids = []
eids = []
hbs = []
sids = []
for idx in e.index:
for job in range(e.employees[idx]):
bids.append(e.building_id[idx])
eids.append(e.index[idx])
hbs.append(e.home_based_status[idx])
sids.append(e.sector_id[idx])
jobs = pd.DataFrame({'job_id':range(1,len(bids)+1),'building_id':bids,'establishment_id':eids,'home_based_status':hbs,'sector_id':sids})
jobs['parcel_id'] = bpz.parcel_id[jobs.building_id].values
jobs['urbancenter_id'] = bpz.urbancenter_id[jobs.building_id].values
jobs['x'] = bpz.x[jobs.building_id].values.astype('int64')
jobs['y'] = bpz.y[jobs.building_id].values.astype('int64')
print jobs[['x','y']].describe()
jobs['taz05_id'] = bpz.external_zone_id[jobs.building_id].values
jobs['sector_id_six'] = 1*(jobs.sector_id==61) + 2*(jobs.sector_id==71) + 3*np.in1d(jobs.sector_id,[11,21,22,23,31,32,33,42,48,49]) + 4*np.in1d(jobs.sector_id,[7221,7222,7224]) + 5*np.in1d(jobs.sector_id,[44,45,7211,7212,7213,7223]) + 6*np.in1d(jobs.sector_id,[51,52,53,54,55,56,62,81,92])
jobs['jobtypename'] = ''
jobs.jobtypename[jobs.sector_id_six==1] = 'Education'
jobs.jobtypename[jobs.sector_id_six==2] = 'Entertainment'
jobs.jobtypename[jobs.sector_id_six==3] = 'Production'
jobs.jobtypename[jobs.sector_id_six==4] = 'Restaurant'
jobs.jobtypename[jobs.sector_id_six==5] = 'Retail'
jobs.jobtypename[jobs.sector_id_six==6] = 'Service'
big_parcel_ids_with_jobs = np.unique(jobs.parcel_id[np.in1d(jobs.parcel_id,big_parcels)].values)
for parcel_id in big_parcel_ids_with_jobs:
idx_jobs_on_parcel = np.in1d(jobs.parcel_id,[parcel_id,])
coords = parcel_coords[parcel_coords.parcel_id==parcel_id]
idx_coord = np.random.choice(coords.index,size=idx_jobs_on_parcel.sum(),replace=True)
x = coords.x.loc[idx_coord].values
y = coords.y.loc[idx_coord].values
jobs.x[idx_jobs_on_parcel] = x
jobs.y[idx_jobs_on_parcel] = y
del jobs['sector_id_six']
del jobs['building_id']
del jobs['establishment_id']
del jobs['home_based_status']
del jobs['sector_id']
jobs.rename(columns={'job_id':'tempid'},inplace=True)
In [2]:
jobs[['x','y']].describe()
Out[2]:
In [1]:
import numpy as np, pandas as pd, os
from synthicity.utils import misc
from drcog.models import regression_model_estimation, choice_model_estimation, dataset, refiner
dset = dataset.DRCOGDataset(os.path.join(misc.data_dir(),'drcog.h5'))
from drcog.variables import variable_library
variable_library.calculate_variables(dset)
sim_year = 2013
refiner.run(dset, sim_year)
In [5]:
dset.parcels.x.isnull().sum()
Out[5]:
In [7]:
dset.parcels[['x','y']].describe()
Out[7]:
In [8]:
z = dset.zones
In [12]:
print z.zonecentroid_x.describe()
print z.zonecentroid_y.describe()
print z.zonecentroid_x.isnull().sum()
print z.zonecentroid_y.isnull().sum()
In [1]:
import numpy as np, pandas as pd, os
from synthicity.utils import misc
from drcog.models import regression_model_estimation, choice_model_estimation, dataset, refiner
dset = dataset.DRCOGDataset(os.path.join(misc.data_dir(),'drcog.h5'))
from drcog.variables import variable_library
variable_library.calculate_variables(dset)
zone_refine = pd.read_csv(os.path.join(misc.data_dir(),'zone_demand_refine.csv'))
In [5]:
z = dset.zones
for zone in zone_refine.zone_id.values:
idx_zone = (zone_refine.zone_id==zone)
hh_shift = zone_refine.annual_hh_shift[idx_zone].values[0]
emp_shift = zone_refine.annual_emp_shift[idx_zone].values[0]
zone_id = zone
if emp_shift > 0:
if zone_id not in dset.parcels.zone_id.values:
print 'yoyo'
county = z.county.values[z.index.values==zone_id][0]
x = z.zonecentroid_x.values[z.index.values==zone_id][0]
y = z.zonecentroid_y.values[z.index.values==zone_id][0]
print x
print y
print type(x)
print type(y)
if county == 'Denver':
county_id = 8031
elif county == 'Adams':
county_id = 8001
elif county == 'Arapahoe':
county_id = 8005
elif county == 'Boulder':
county_id = 8013
elif county == 'Broomfield':
county_id = 8014
elif county == 'Clear Creek':
county_id = 8019
elif county == 'Douglas':
county_id = 8035
elif county == 'Elbert':
county_id = 8039
elif county == 'Gilpin':
county_id = 8047
elif county == 'Jefferson':
county_id = 8059
elif county == 'Weld':
county_id = 8123
pid = dset.parcels.index.values.max()+1
newparcel = pd.DataFrame({'county_id':[county_id],'parcel_sqft':[43560],'land_value':[0],'zone_id':[zone_id],
'centroid_x':[x],'centroid_y':[y],'dist_bus':[6000],'dist_rail':[6000],'in_ugb':[1],'in_uga':[0],
'prop_constrained':[0.0],'acres':[1.0] })
newparcel.index = np.array([pid])
print newparcel
In [6]:
dset.parcels
Out[6]:
In [8]:
dset.store.parcels[['centroid_x','x','centroid_y','y']].head()
Out[8]: