In [ ]:
import numpy as np, pandas as pd, os
from synthicity.utils import misc
from drcog.models import dataset####in the src dir, not examples, to mesh with opus
dset = dataset.DRCOGDataset(os.path.join(misc.data_dir(),'drcog.h5'))
buildings = dset.fetch('buildings')[['building_type_id','improvement_value','land_area','non_residential_sqft','parcel_id','residential_units','sqft_per_unit','stories','tax_exempt','year_built','bldg_sq_ft','unit_price_non_residential','unit_price_residential','building_sqft_per_job','non_residential_units','base_year_jobs','all_units']]
establishments = dset.fetch('establishments')
households = dset.fetch('households')
parcels = dset.fetch('parcels')
zones = dset.fetch('zones')
##Merge buildings and parcels
buildings = pd.merge(buildings,parcels,left_on='parcel_id',right_index=True)
##Merge households with bulidings/parcels
households = pd.merge(households,buildings,left_on='building_id',right_index=True)
##Merge establishments with bulidings/parcels
establishments = pd.merge(establishments,buildings,left_on='building_id',right_index=True)
In [ ]:
establishments.employees
In [ ]:
establishments.employees.sum()
In [ ]:
establishments.employees.describe()
In [ ]:
establishments.groupby('zone_id').employees.sum()
In [ ]:
establishments.groupby('county_id').employees.sum()
In [ ]:
establishments.groupby('city_id').employees.sum()
In [ ]:
establishments[establishments.sector_id==33]
In [ ]:
establishments[establishments.sector_id==33].groupby('county_id').employees.sum()
In [ ]:
establishments[establishments.dist_rail<2600].groupby('county_id').employees.sum()
In [ ]:
establishments[(establishments.dist_rail<2600)*(establishments.sector_id==54)].groupby('county_id').employees.sum()
In [ ]:
non_home_based_establishments = establishments[establishments.home_based_status==0]
In [ ]:
print non_home_based_establishments.groupby('city_id').employees.sum()
print non_home_based_establishments.groupby('city_id').employees.mean()
print non_home_based_establishments.groupby('city_id').employees.median()
In [ ]:
zonal_employment = establishments.groupby('zone_id').employees.sum()
zonal_acreage = parcels.groupby('zone_id').parcel_sqft.sum()/43560.0
employment_density = zonal_employment/zonal_acreage
print employment_density
In [ ]:
households.groupby('zone_id').persons.sum()
In [ ]:
households.groupby('zone_id').size()
In [ ]:
households.groupby('county_id').children.sum()
In [ ]:
households.groupby('county_id').children.sum()*1.0/households.children.sum()
In [ ]:
households.groupby('county_id').cars.sum()*1.0/households.groupby('county_id').size()
In [ ]:
workers = households.groupby('county_id').workers.sum()
employment = establishments.groupby('county_id').employees.sum()
employment*1.0/workers
In [ ]:
households[households.race_id==1].groupby(['county_id']).age_of_head.mean()
In [ ]:
households.groupby(['county_id','race_id']).income.median()
In [ ]:
###Per capita income
zonal_income = households.groupby('zone_id').income.sum()
zonal_persons = households.groupby('zone_id').persons.sum()
per_capita_income = zonal_income*1.0/zonal_persons
print per_capita_income