In [1]:
import numpy as np, pandas as pd, os
from synthicity.utils import misc
from drcog.models import regression_model_estimation, choice_model_estimation, dataset
dset = dataset.DRCOGDataset(os.path.join(misc.data_dir(),'drcog.h5'))
np.random.seed(1)

##Variable Library
from drcog.variables import variable_library
variable_library.calculate_variables(dset)

buildings = dset.fetch('buildings')[['building_type_id','improvement_value','land_area','non_residential_sqft','parcel_id','residential_units','sqft_per_unit','stories','tax_exempt','year_built','bldg_sq_ft','unit_price_non_residential','unit_price_residential','building_sqft_per_job','non_residential_units','base_year_jobs','all_units']]
establishments = dset.fetch('establishments')
del establishments['zone_id']
del establishments['county_id']
households = dset.fetch('households')
del households['zone_id']
del households['county_id']
parcels = dset.fetch('parcels')
parcels_urbancen = dset.store.parcels_urbancen.set_index('parcel_id')
parcels['urbancenter_id'] = parcels_urbancen.urban_cen
zones = dset.fetch('zones')
pz = pd.merge(parcels.reset_index(),zones,left_on='zone_id',right_index=True,how='left')
pz = pz.set_index('parcel_id')
bpz = pd.merge(buildings,pz,left_on='parcel_id',right_index=True)

##Merge buildings and parcels
buildings = pd.merge(buildings,parcels,left_on='parcel_id',right_index=True)

##Merge households with bulidings/parcels
households = pd.merge(households,buildings,left_on='building_id',right_index=True)

##Merge establishments with bulidings/parcels
establishments = pd.merge(establishments,buildings,left_on='building_id',right_index=True)

#####Export jobs table
e = establishments.reset_index()
bids = []
eids = []
hbs = []
sids = []
for idx in e.index:
    for job in range(e.employees[idx]):
        bids.append(e.building_id[idx])
        eids.append(e.index[idx])
        hbs.append(e.home_based_status[idx])
        sids.append(e.sector_id[idx])
print len(bids)
print len(eids)
print len(hbs)
print len(sids)
jobs = pd.DataFrame({'job_id':range(1,len(bids)+1),'building_id':bids,'establishment_id':eids,'home_based_status':hbs,'sector_id':sids})
jobs['parcel_id'] = bpz.parcel_id[jobs.building_id].values
jobs['urbancenter_id'] = bpz.urbancenter_id[jobs.building_id].values
jobs['x'] = bpz.centroid_x[jobs.building_id].values
jobs['y'] = bpz.centroid_y[jobs.building_id].values
jobs['taz05_id'] = bpz.external_zone_id[jobs.building_id].values
jobs['sector_id_six'] = 1*(jobs.sector_id==61) + 2*(jobs.sector_id==71) + 3*np.in1d(jobs.sector_id,[11,21,22,23,31,32,33,42,48,49]) + 4*np.in1d(jobs.sector_id,[7221,7222,7224]) + 5*np.in1d(jobs.sector_id,[44,45,7211,7212,7213,7223]) + 6*np.in1d(jobs.sector_id,[51,52,53,54,55,56,62,81,92])
jobs['jobtypename'] = ''
jobs.jobtypename[jobs.sector_id_six==1] = 'Education'
jobs.jobtypename[jobs.sector_id_six==2] = 'Entertainment'
jobs.jobtypename[jobs.sector_id_six==3] = 'Production'
jobs.jobtypename[jobs.sector_id_six==4] = 'Restaurant'
jobs.jobtypename[jobs.sector_id_six==5] = 'Retail'
jobs.jobtypename[jobs.sector_id_six==6] = 'Service'
del jobs['sector_id_six']
del jobs['building_id']
del jobs['establishment_id']
del jobs['home_based_status']
del jobs['sector_id']
jobs.rename(columns={'job_id':'tempid'},inplace=True)
#jobs.to_csv(tm_input_dir+'\\jobs%s.csv'%sim_year,index=False)

#####Export household points
hh = households[['building_id']].reset_index()
hh['parcel_id'] = bpz.parcel_id[hh.building_id].values
hh['urbancenter_id'] = bpz.urbancenter_id[hh.building_id].values
hh['x'] = bpz.centroid_x[hh.building_id].values
hh['y'] = bpz.centroid_y[hh.building_id].values
hh['taz05_id'] = bpz.external_zone_id[hh.building_id].values
hh['dist_trans'] = np.minimum(bpz.dist_rail[hh.building_id].values, bpz.dist_bus[hh.building_id].values)/5280.0


Fetching parcels
Fetching modify_table
Fetching buildings
Fetching establishments
Fetching modify_table
Fetching modify_table
Fetching households_for_estimation
Fetching modify_table
Fetching households
Fetching modify_table
Fetching zones
Fetching modify_table
Fetching travel_data
Fetching modify_table
1605001
1605001
1605001
1605001

In [2]:
parcel_coords = dset.parcel_coords


Fetching parcel_coords
Fetching modify_table

In [3]:
parcel_coords.x = parcel_coords.x.astype('int64')
parcel_coords.y = parcel_coords.y.astype('int64')

In [4]:
big_parcels = parcels.index.values[parcels.parcel_sqft>= 435600]

In [5]:
big_parcel_ids_with_hh = np.unique(hh.parcel_id[np.in1d(hh.parcel_id,big_parcels)].values)

In [6]:
len(big_parcel_ids_with_hh)


Out[6]:
9869

In [7]:
i = 0
for parcel_id in big_parcel_ids_with_hh:
    idx_hh_on_parcel = np.in1d(hh.parcel_id,[parcel_id,])
    coords = parcel_coords[parcel_coords.parcel_id==parcel_id]
    idx_coord = np.random.choice(coords.index,size=idx_hh_on_parcel.sum(),replace=True)
    x = coords.x.loc[idx_coord].values
    y = coords.y.loc[idx_coord].values
    print hh.x[idx_hh_on_parcel]
    hh.x[idx_hh_on_parcel] = x
    hh.y[idx_hh_on_parcel] = y
    print hh.x[idx_hh_on_parcel]
    i+=1
    if i==20:
        break


251549    3140736
251550    3140736
251551    3140736
251552    3140736
251553    3140736
251554    3140736
251555    3140736
251556    3140736
251557    3140736
251558    3140736
251559    3140736
251560    3140736
251658    3140736
251659    3140736
251660    3140736
...
417802    3140736
428372    3140736
428373    3140736
428374    3140736
428375    3140736
428376    3140736
428377    3140736
619170    3140736
619171    3140736
619172    3140736
619173    3140736
619174    3140736
619175    3140736
619176    3140736
619177    3140736
Name: x, Length: 120, dtype: int64
251549    3140661
251550    3140572
251551    3140424
251552    3140661
251553    3140494
251554    3140494
251555    3141155
251556    3140717
251557    3141179
251558    3140424
251559    3141182
251560    3140692
251658    3140661
251659    3141182
251660    3140692
...
417802    3141155
428372    3140717
428373    3140424
428374    3140572
428375    3140692
428376    3140494
428377    3141155
619170    3140424
619171    3140572
619172    3141182
619173    3140513
619174    3141155
619175    3141182
619176    3140717
619177    3141182
Name: x, Length: 120, dtype: int64
35486    3176919
35487    3176919
35488    3176919
35489    3176919
35490    3176919
35491    3176919
35492    3176919
35493    3176919
35494    3176919
35495    3176919
35496    3176919
35497    3176919
35498    3176919
35499    3176919
35500    3176919
...
427628    3176919
427629    3176919
427630    3176919
427667    3176919
427668    3176919
427669    3176919
427670    3176919
427671    3176919
427672    3176919
427673    3176919
427674    3176919
427675    3176919
427676    3176919
427677    3176919
427678    3176919
Name: x, Length: 128, dtype: int64
35486    3176519
35487    3177400
35488    3176787
35489    3176911
35490    3176519
35491    3176519
35492    3176911
35493    3177064
35494    3177064
35495    3177400
35496    3176519
35497    3176581
35498    3177253
35499    3176582
35500    3176519
...
427628    3177253
427629    3176519
427630    3176519
427667    3176787
427668    3176519
427669    3176787
427670    3176519
427671    3177400
427672    3177400
427673    3176911
427674    3176787
427675    3176519
427676    3177400
427677    3176519
427678    3177064
Name: x, Length: 128, dtype: int64
331516    3156662
331517    3156662
331518    3156662
331519    3156662
331520    3156662
331521    3156662
331522    3156662
331523    3156662
331524    3156662
331525    3156662
331526    3156662
331527    3156662
331528    3156662
331529    3156662
331530    3156662
...
582055    3156662
582056    3156662
582057    3156662
582058    3156662
582059    3156662
582062    3156662
582063    3156662
582064    3156662
582065    3156662
582066    3156662
582067    3156662
582068    3156662
582069    3156662
582070    3156662
582071    3156662
Name: x, Length: 122, dtype: int64
331516    3156610
331517    3156472
331518    3156800
331519    3156610
331520    3156472
331521    3156909
331522    3156919
331523    3156694
331524    3156472
331525    3156748
331526    3156748
331527    3156610
331528    3156848
331529    3156919
331530    3156800
...
582055    3156848
582056    3156610
582057    3156909
582058    3156919
582059    3156472
582062    3156610
582063    3156583
582064    3156848
582065    3156610
582066    3156848
582067    3156583
582068    3156848
582069    3156909
582070    3156729
582071    3156694
Name: x, Length: 122, dtype: int64
525562    3144349
525563    3144349
550817    3144349
550818    3144349
550819    3144349
550820    3144349
550821    3144349
550822    3144349
550823    3144349
580698    3144349
580699    3144349
580700    3144349
580702    3144349
580703    3144349
580704    3144349
...
1002146    3144349
1002147    3144349
1002152    3144349
1002154    3144349
1019527    3144349
1019528    3144349
1019529    3144349
1019530    3144349
1019534    3144349
1052329    3144349
1070096    3144349
1071562    3144349
1078676    3144349
1094690    3144349
1132298    3144349
Name: x, Length: 81, dtype: int64
525562    3144243
525563    3144138
550817    3143855
550818    3144980
550819    3143855
550820    3144210
550821    3144117
550822    3144416
550823    3144210
580698    3144126
580699    3144126
580700    3144126
580702    3144416
580703    3144210
580704    3143855
...
1002146    3144117
1002147    3144138
1002152    3144117
1002154    3144126
1019527    3144126
1019528    3144117
1019529    3144117
1019530    3144210
1019534    3144117
1052329    3144980
1070096    3144850
1071562    3144156
1078676    3144980
1094690    3144156
1132298    3144156
Name: x, Length: 81, dtype: int64
750710    3434175
Name: x, dtype: int64
750710    3434134
Name: x, dtype: int64
863190    3412837
Name: x, dtype: int64
863190    3411674
Name: x, dtype: int64
454085    3424658
454086    3424658
941826    3424658
Name: x, dtype: int64
454085    3423131
454086    3425006
941826    3423446
Name: x, dtype: int64
331142    3404732
331143    3404732
808095    3404732
Name: x, dtype: int64
331142    3405084
331143    3404234
808095    3404294
Name: x, dtype: int64
780033    3394098
Name: x, dtype: int64
780033    3394116
Name: x, dtype: int64
505850    3395036
Name: x, dtype: int64
505850    3396270
Name: x, dtype: int64
460860    3386498
460861    3386498
551795    3386498
551796    3386498
661955    3386498
Name: x, dtype: int64
460860    3386728
460861    3386177
551795    3386213
551796    3386517
661955    3386481
Name: x, dtype: int64
618804    3386935
Name: x, dtype: int64
618804    3386922
Name: x, dtype: int64
699381    3386975
699382    3386975
Name: x, dtype: int64
699381    3386626
699382    3387320
Name: x, dtype: int64
863192    3358228
Name: x, dtype: int64
863192    3358084
Name: x, dtype: int64
581966    3360074
Name: x, dtype: int64
581966    3360584
Name: x, dtype: int64
581970     3329864
581971     3329864
958363     3329864
1052772    3329864
Name: x, dtype: int64
581970     3330354
581971     3330619
958363     3330972
1052772    3330619
Name: x, dtype: int64
454080    3329839
454081    3329839
581968    3329839
581969    3329839
Name: x, dtype: int64
454080    3329459
454081    3329263
581968    3329056
581969    3330348
Name: x, dtype: int64
958366    3338597
Name: x, dtype: int64
958366    3338994
Name: x, dtype: int64
722229    3339087
722230    3339087
Name: x, dtype: int64
722229    3339329
722230    3339229
Name: x, dtype: int64
693653    3340179
Name: x, dtype: int64
693653    3340625
Name: x, dtype: int64

In [8]:
big_parcel_ids_with_jobs = np.unique(jobs.parcel_id[np.in1d(jobs.parcel_id,big_parcels)].values)
print len(big_parcel_ids_with_jobs)


3544

In [9]:
i = 0
for parcel_id in big_parcel_ids_with_jobs:
    idx_jobs_on_parcel = np.in1d(jobs.parcel_id,[parcel_id,])
    coords = parcel_coords[parcel_coords.parcel_id==parcel_id]
    idx_coord = np.random.choice(coords.index,size=idx_jobs_on_parcel.sum(),replace=True)
    x = coords.x.loc[idx_coord].values
    y = coords.y.loc[idx_coord].values
    print jobs.x[idx_jobs_on_parcel]
    jobs.x[idx_jobs_on_parcel] = x
    jobs.y[idx_jobs_on_parcel] = y
    print jobs.x[idx_jobs_on_parcel]
    i+=1
    if i==20:
        break


1446282    3140736
1446418    3140736
1490201    3140736
1490202    3140736
1490203    3140736
1490204    3140736
1490205    3140736
1490206    3140736
1490207    3140736
1490208    3140736
1490209    3140736
1490210    3140736
1490211    3140736
1490212    3140736
1490213    3140736
1490214    3140736
1490215    3140736
Name: x, dtype: int64
1446282    3140513
1446418    3140572
1490201    3140513
1490202    3140494
1490203    3141179
1490204    3140513
1490205    3140494
1490206    3141179
1490207    3140661
1490208    3140424
1490209    3141179
1490210    3140692
1490211    3141179
1490212    3141179
1490213    3141182
1490214    3141182
1490215    3140692
Name: x, dtype: int64
1446419    3176919
1446420    3176919
1446421    3176919
Name: x, dtype: int64
1446419    3177253
1446420    3176911
1446421    3177075
Name: x, dtype: int64
1446525    3156662
1446526    3156662
1446527    3156662
1446528    3156662
1446529    3156662
1446530    3156662
1446531    3156662
1446532    3156662
1446533    3156662
1537615    3156662
1537616    3156662
1537617    3156662
1537618    3156662
Name: x, dtype: int64
1446525    3156800
1446526    3156472
1446527    3156848
1446528    3156919
1446529    3156610
1446530    3156472
1446531    3156472
1446532    3156848
1446533    3156748
1537615    3156848
1537616    3156694
1537617    3156583
1537618    3156472
Name: x, dtype: int64
1446535    3144349
1446536    3144349
1446537    3144349
1446669    3144349
1497121    3144349
1497122    3144349
1497123    3144349
1497124    3144349
1497125    3144349
Name: x, dtype: int64
1446535    3143855
1446536    3144138
1446537    3144156
1446669    3144138
1497121    3144210
1497122    3143855
1497123    3144126
1497124    3144416
1497125    3144850
Name: x, dtype: int64
1534484    3493706
1534485    3493706
Name: x, dtype: int64
1534484    3493115
1534485    3492634
Name: x, dtype: int64
1576357    3470823
Name: x, dtype: int64
1576357    3471376
Name: x, dtype: int64
1578602    3394098
Name: x, dtype: int64
1578602    3393492
Name: x, dtype: int64
1525667    3391577
1525668    3391577
1525669    3391577
1525670    3391577
1525671    3391577
1525672    3391577
Name: x, dtype: int64
1525667    3390326
1525668    3389608
1525669    3389850
1525670    3393261
1525671    3394235
1525672    3390344
Name: x, dtype: int64
1589185    3396940
Name: x, dtype: int64
1589185    3396763
Name: x, dtype: int64
1587032    3387461
Name: x, dtype: int64
1587032    3388011
Name: x, dtype: int64
1599514    3386935
Name: x, dtype: int64
1599514    3386579
Name: x, dtype: int64
1586948    3358228
1586949    3358228
Name: x, dtype: int64
1586948    3358084
1586949    3357946
Name: x, dtype: int64
1529789    3324178
1529790    3324178
1529791    3324178
Name: x, dtype: int64
1529789    3325272
1529790    3323985
1529791    3324033
Name: x, dtype: int64
1582437    3333919
Name: x, dtype: int64
1582437    3333622
Name: x, dtype: int64
121803    3337646
121804    3337646
Name: x, dtype: int64
121803    3338527
121804    3337933
Name: x, dtype: int64
1580955    3331567
Name: x, dtype: int64
1580955    3331784
Name: x, dtype: int64
1538323    3329839
Name: x, dtype: int64
1538323    3329259
Name: x, dtype: int64
1587562    3321966
Name: x, dtype: int64
1587562    3322558
Name: x, dtype: int64
1491187    3340076
1491188    3340076
1491189    3340076
1491190    3340076
1491191    3340076
Name: x, dtype: int64
1491187    3340271
1491188    3340705
1491189    3340082
1491190    3340359
1491191    3341018
Name: x, dtype: int64
121805    3340116
Name: x, dtype: int64
121805    3340579
Name: x, dtype: int64

In [8]:
hh.x.dtype


Out[8]:
dtype('float64')

In [9]:
coords.x.dtype


Out[9]:
dtype('float64')

In [12]:
jobs.x.dtype


Out[12]:
dtype('float64')