In [1]:
import pandas as pd
import numpy as np

hh = pd.read_csv('C:\\urbansim\\data\\travel_model\\2015\\housing_units2015.csv')
print (hh.x<1).sum()
print (hh.y<1).sum()
print (hh.taz05_id<0).sum()

synth_hh = pd.read_csv('C:\\urbansim\\data\\travel_model\\2015\\SynHH2015.csv')
synth_p = pd.read_csv('C:\\urbansim\\data\\travel_model\\2015\\SynPers2015.csv')
print len(np.unique(synth_hh.hhid))
print len(np.unique(synth_p.hhid))
print (np.unique(synth_hh.hhid).values != np.unique(synth_p.hhid).values).sum()

In [1]:
import numpy as np, pandas as pd, os
from synthicity.utils import misc
from drcog.models import regression_model_estimation, choice_model_estimation, dataset
dset = dataset.DRCOGDataset(os.path.join(misc.data_dir(),'drcog.h5'))
np.random.seed(1)

##Variable Library
from drcog.variables import variable_library
variable_library.calculate_variables(dset)

buildings = dset.fetch('buildings')[['building_type_id','improvement_value','land_area','non_residential_sqft','parcel_id','residential_units','sqft_per_unit','stories','tax_exempt','year_built','bldg_sq_ft','unit_price_non_residential','unit_price_residential','building_sqft_per_job','non_residential_units','base_year_jobs','all_units']]
parcels = dset.fetch('parcels')
parcels_urbancen = dset.store.parcels_urbancen.set_index('parcel_id')
parcels['urbancenter_id'] = parcels_urbancen.urban_cen
zones = dset.fetch('zones')
pz = pd.merge(parcels.reset_index(),zones,left_on='zone_id',right_index=True,how='left')
pz = pz.set_index('parcel_id')
bpz = pd.merge(buildings,pz,left_on='parcel_id',right_index=True)
print bpz[['x','y']].describe()

establishments = dset.fetch('establishments')
del establishments['zone_id']
del establishments['county_id']        
##Merge establishments with bulidings/parcels
establishments = pd.merge(establishments,buildings,left_on='building_id',right_index=True)

##Available parcel coordinates (includes random x,y for big parcels)
parcel_coords = dset.parcel_coords
parcel_coords.x = parcel_coords.x.astype('int64')
parcel_coords.y = parcel_coords.y.astype('int64')
big_parcels = parcels.index.values[parcels.parcel_sqft>= 435600]
print parcel_coords[['x','y']].describe()

e = establishments.reset_index()
bids = []
eids = []
hbs = []
sids = []
for idx in e.index:
    for job in range(e.employees[idx]):
        bids.append(e.building_id[idx])
        eids.append(e.index[idx])
        hbs.append(e.home_based_status[idx])
        sids.append(e.sector_id[idx])

jobs = pd.DataFrame({'job_id':range(1,len(bids)+1),'building_id':bids,'establishment_id':eids,'home_based_status':hbs,'sector_id':sids})
jobs['parcel_id'] = bpz.parcel_id[jobs.building_id].values
jobs['urbancenter_id'] = bpz.urbancenter_id[jobs.building_id].values
jobs['x'] = bpz.x[jobs.building_id].values.astype('int64')
jobs['y'] = bpz.y[jobs.building_id].values.astype('int64')
print jobs[['x','y']].describe()
jobs['taz05_id'] = bpz.external_zone_id[jobs.building_id].values
jobs['sector_id_six'] = 1*(jobs.sector_id==61) + 2*(jobs.sector_id==71) + 3*np.in1d(jobs.sector_id,[11,21,22,23,31,32,33,42,48,49]) + 4*np.in1d(jobs.sector_id,[7221,7222,7224]) + 5*np.in1d(jobs.sector_id,[44,45,7211,7212,7213,7223]) + 6*np.in1d(jobs.sector_id,[51,52,53,54,55,56,62,81,92])
jobs['jobtypename'] = ''
jobs.jobtypename[jobs.sector_id_six==1] = 'Education'
jobs.jobtypename[jobs.sector_id_six==2] = 'Entertainment'
jobs.jobtypename[jobs.sector_id_six==3] = 'Production'
jobs.jobtypename[jobs.sector_id_six==4] = 'Restaurant'
jobs.jobtypename[jobs.sector_id_six==5] = 'Retail'
jobs.jobtypename[jobs.sector_id_six==6] = 'Service'
big_parcel_ids_with_jobs = np.unique(jobs.parcel_id[np.in1d(jobs.parcel_id,big_parcels)].values)
for parcel_id in big_parcel_ids_with_jobs:
    idx_jobs_on_parcel = np.in1d(jobs.parcel_id,[parcel_id,])
    coords = parcel_coords[parcel_coords.parcel_id==parcel_id]
    idx_coord = np.random.choice(coords.index,size=idx_jobs_on_parcel.sum(),replace=True)
    x = coords.x.loc[idx_coord].values
    y = coords.y.loc[idx_coord].values
    jobs.x[idx_jobs_on_parcel] = x
    jobs.y[idx_jobs_on_parcel] = y
del jobs['sector_id_six']
del jobs['building_id']
del jobs['establishment_id']
del jobs['home_based_status']
del jobs['sector_id']
jobs.rename(columns={'job_id':'tempid'},inplace=True)


Fetching parcels
Fetching modify_table
Fetching buildings
Fetching establishments
Fetching modify_table
Fetching modify_table
Fetching households_for_estimation
Fetching modify_table
Fetching households
Fetching modify_table
Fetching zones
Fetching modify_table
Fetching travel_data
Fetching modify_table
                    x               y
count   881750.000000   881750.000000
mean   3144436.201901  1701875.798045
std      49418.503640    70245.735035
min    2887889.000000  1472411.000000
25%    3114606.000000  1654203.000000
50%    3144059.500000  1692533.000000
75%    3174067.750000  1748442.000000
max    3503278.000000  1885392.000000
Fetching parcel_coords
Fetching modify_table
                    x               y
count  3162512.000000  3162512.000000
mean   3182831.423264  1705442.081687
std     107253.625533    98444.320674
min    2881134.000000  1471931.000000
25%    3110470.000000  1639741.000000
50%    3170913.000000  1705134.000000
75%    3250269.000000  1783339.000000
max    3505283.000000  1891260.000000
                    x               y
count  1605001.000000  1605001.000000
mean   3143024.325172  1698605.975305
std      40269.686028    56584.968178
min    2921499.000000  1473259.000000
25%    3117541.000000  1662086.000000
50%    3144090.000000  1694108.000000
75%    3170342.000000  1721673.000000
max    3501703.000000  1885037.000000

In [2]:
jobs[['x','y']].describe()


Out[2]:
x y
count 1605001.000000 1605001.000000
mean 3143025.737672 1698607.378752
std 40270.787520 56588.927076
min 2921499.000000 1473259.000000
25% 3117545.000000 1662086.000000
50% 3144086.000000 1694122.000000
75% 3170338.000000 1721673.000000
max 3503917.000000 1885037.000000

In [1]:
import numpy as np, pandas as pd, os
from synthicity.utils import misc
from drcog.models import regression_model_estimation, choice_model_estimation, dataset, refiner
dset = dataset.DRCOGDataset(os.path.join(misc.data_dir(),'drcog.h5'))
from drcog.variables import variable_library
variable_library.calculate_variables(dset)

sim_year = 2013
refiner.run(dset, sim_year)


Fetching parcels
Fetching modify_table
Fetching buildings
Fetching establishments
Fetching modify_table
Fetching modify_table
Fetching households_for_estimation
Fetching modify_table
Fetching households
Fetching modify_table
Fetching zones
Fetching modify_table
Fetching travel_data
Fetching modify_table
No buildings in specified zone.
Constructing small structure to place agents
No buildings in specified zone.
Constructing small structure to place agents
No buildings in specified zone.
Constructing small structure to place agents
No buildings in specified zone.
Constructing small structure to place agents
No buildings in specified zone.
Constructing small structure to place agents
No buildings in specified zone.
Constructing small structure to place agents
No buildings in specified zone.
Constructing small structure to place agents
No buildings in specified zone.
Constructing small structure to place agents
No buildings in specified zone.
Constructing small structure to place agents
No buildings in specified zone.
Constructing small structure to place agents
No buildings in specified zone.
Constructing small structure to place agents
No buildings in specified zone.
Constructing small structure to place agents
No buildings in specified zone.
Constructing small structure to place agents
No buildings in specified zone.
Constructing small structure to place agents
No buildings in specified zone.
Constructing small structure to place agents
No buildings in specified zone.
Constructing small structure to place agents
No buildings in specified zone.
Constructing small structure to place agents
No buildings in specified zone.
Constructing small structure to place agents
No buildings in specified zone.
Constructing small structure to place agents
No buildings in specified zone.
Constructing small structure to place agents
No buildings in specified zone.
Constructing small structure to place agents
No buildings in specified zone.
Constructing small structure to place agents
No buildings in specified zone.
Constructing small structure to place agents
No buildings in specified zone.
Constructing small structure to place agents
No buildings in specified zone.
Constructing small structure to place agents
No buildings in specified zone.
Constructing small structure to place agents
No buildings in specified zone.
Constructing small structure to place agents
No buildings in specified zone.
Constructing small structure to place agents
No buildings in specified zone.
Constructing small structure to place agents

In [5]:
dset.parcels.x.isnull().sum()


Out[5]:
2

In [7]:
dset.parcels[['x','y']].describe()


Out[7]:
x y
count 949484.000000 949484.000000
mean 3145177.076667 1700240.999813
std 55332.772747 71084.681992
min 2887889.000000 1472248.000000
25% 3114749.000000 1652022.000000
50% 3145977.000000 1692499.000000
75% 3179069.000000 1748392.000000
max 3503749.000000 1886418.000000

In [8]:
z = dset.zones

In [12]:
print z.zonecentroid_x.describe()
print z.zonecentroid_y.describe()
print z.zonecentroid_x.isnull().sum()
print z.zonecentroid_y.isnull().sum()


count       2804.000000
mean     3146421.865906
std        48216.242973
min      2927350.000000
25%      3115305.000000
50%      3143710.000000
75%      3177725.000000
max      3414770.000000
dtype: float64
count       2804.000000
mean     1706572.690442
std        69223.715800
min      1478860.000000
25%      1664557.500000
50%      1698365.000000
75%      1756882.500000
max      1881480.000000
dtype: float64
0
0

In [1]:
import numpy as np, pandas as pd, os
from synthicity.utils import misc
from drcog.models import regression_model_estimation, choice_model_estimation, dataset, refiner
dset = dataset.DRCOGDataset(os.path.join(misc.data_dir(),'drcog.h5'))
from drcog.variables import variable_library
variable_library.calculate_variables(dset)
zone_refine = pd.read_csv(os.path.join(misc.data_dir(),'zone_demand_refine.csv'))


Fetching parcels
Fetching modify_table
Fetching buildings
Fetching establishments
Fetching modify_table
Fetching modify_table
Fetching households_for_estimation
Fetching modify_table
Fetching households
Fetching modify_table
Fetching zones
Fetching modify_table
Fetching travel_data
Fetching modify_table

In [5]:
z = dset.zones
for zone in zone_refine.zone_id.values:
    idx_zone = (zone_refine.zone_id==zone)
    hh_shift = zone_refine.annual_hh_shift[idx_zone].values[0]
    emp_shift = zone_refine.annual_emp_shift[idx_zone].values[0]
    zone_id = zone
    if emp_shift > 0:
        if zone_id not in dset.parcels.zone_id.values:
            print 'yoyo'
            county = z.county.values[z.index.values==zone_id][0]
            x = z.zonecentroid_x.values[z.index.values==zone_id][0]
            y = z.zonecentroid_y.values[z.index.values==zone_id][0]
            print x
            print y
            print type(x)
            print type(y)
            
            if county == 'Denver':
                county_id = 8031
            elif county == 'Adams':
                county_id = 8001
            elif county == 'Arapahoe':
                county_id = 8005
            elif county == 'Boulder':
                county_id = 8013
            elif county == 'Broomfield':
                county_id = 8014
            elif county == 'Clear Creek':
                county_id = 8019
            elif county == 'Douglas':
                county_id = 8035
            elif county == 'Elbert':
                county_id = 8039
            elif county == 'Gilpin':
                county_id = 8047
            elif county == 'Jefferson':
                county_id = 8059
            elif county == 'Weld':
                county_id = 8123
            pid = dset.parcels.index.values.max()+1
            newparcel = pd.DataFrame({'county_id':[county_id],'parcel_sqft':[43560],'land_value':[0],'zone_id':[zone_id],
                                         'centroid_x':[x],'centroid_y':[y],'dist_bus':[6000],'dist_rail':[6000],'in_ugb':[1],'in_uga':[0],
                                         'prop_constrained':[0.0],'acres':[1.0] })
            newparcel.index = np.array([pid])
            print newparcel


yoyo
3.14108e+06
1.69671e+06
<type 'numpy.float32'>
<type 'numpy.float32'>
         acres  centroid_x  centroid_y  county_id  dist_bus  dist_rail  \
1128936      1     3141080     1696710       8031      6000       6000   

         in_uga  in_ugb  land_value  parcel_sqft  prop_constrained  zone_id  
1128936       0       1           0        43560                 0     1822  
yoyo
3.1416e+06
1.69822e+06
<type 'numpy.float32'>
<type 'numpy.float32'>
         acres  centroid_x  centroid_y  county_id  dist_bus  dist_rail  \
1128936      1     3141600     1698220       8031      6000       6000   

         in_uga  in_ugb  land_value  parcel_sqft  prop_constrained  zone_id  
1128936       0       1           0        43560                 0     1845  

In [6]:
dset.parcels


Out[6]:
<class 'pandas.core.frame.DataFrame'>
Int64Index: 949484 entries, 434515 to 996465
Data columns (total 34 columns):
county_id                       949484  non-null values
parcel_sqft                     949484  non-null values
land_value                      949484  non-null values
zone_id                         949484  non-null values
lu_type_id                      949484  non-null values
centroid_x                      949484  non-null values
centroid_y                      949484  non-null values
tax_exempt_flag                 949484  non-null values
school_district                 949484  non-null values
zoning_id                       949484  non-null values
x                               949484  non-null values
y                               949484  non-null values
dist_bus                        949484  non-null values
dist_rail                       949484  non-null values
in_ugb                          949484  non-null values
in_uga                          949484  non-null values
env_constr_park                 949484  non-null values
env_constr_lake                 949484  non-null values
env_constr_floodplain           949484  non-null values
env_constr_river                949484  non-null values
env_constr_landslide            949484  non-null values
far_id                          949376  non-null values
prop_constrained                949484  non-null values
in_denver                       949484  non-null values
ln_dist_rail                    949484  non-null values
ln_dist_bus                     949484  non-null values
ln_land_value                   949484  non-null values
land_value_per_sqft             949484  non-null values
rail_within_mile                949484  non-null values
cherry_creek_school_district    949484  non-null values
acres                           949484  non-null values
ln_acres                        949484  non-null values
nonres_far                      798789  non-null values
ln_units_per_acre               798789  non-null values
dtypes: float64(17), int32(3), int64(14)

In [8]:
dset.store.parcels[['centroid_x','x','centroid_y','y']].head()


Out[8]:
centroid_x x centroid_y y
0 3078318 3078324 1219160 1765449
1 3086266 3086267 1694794 1694795
2 3117033 3117034 1676494 1676495
3 3206387 3206387 1696114 1696115
4 3137129 3137129 1745327 1745328