In [1]:
q1 = "select parcel_id, county_id, parcel_sqft, land_value, zone_id, lu_type_id, centroid_x, centroid_y, tax_exempt_flag, school_district, zoning_id from parcels"
q2 = "select parcel_id, dist_bus, dist_rail, in_ugb, in_uga, env_constr_park, env_constr_lake, env_constr_floodplain, env_constr_river, env_constr_landslide, far_id from parcels_spatial"

In [2]:
import psycopg2
import pandas.io.sql as sql
conn_string = "host='localhost' port=5432 dbname='drcog' user='postgres' password='postgres'"
conn = psycopg2.connect(conn_string)
cur = conn.cursor()
parcels1 = sql.read_frame(q1,conn)
parcels2 = sql.read_frame(q2,conn)
buildings = sql.read_frame('select * from buildings',conn)

In [3]:
parcels1


Out[3]:
<class 'pandas.core.frame.DataFrame'>
Int64Index: 949484 entries, 0 to 949483
Data columns (total 11 columns):
parcel_id          949484  non-null values
county_id          949484  non-null values
parcel_sqft        949484  non-null values
land_value         949484  non-null values
zone_id            949484  non-null values
lu_type_id         949484  non-null values
centroid_x         949484  non-null values
centroid_y         949484  non-null values
tax_exempt_flag    949484  non-null values
school_district    949484  non-null values
zoning_id          949484  non-null values
dtypes: int64(11)

In [4]:
parcels2


Out[4]:
<class 'pandas.core.frame.DataFrame'>
Int64Index: 950968 entries, 0 to 950967
Data columns (total 11 columns):
parcel_id                950968  non-null values
dist_bus                 950968  non-null values
dist_rail                950968  non-null values
in_ugb                   950968  non-null values
in_uga                   950968  non-null values
env_constr_park          950968  non-null values
env_constr_lake          950968  non-null values
env_constr_floodplain    950968  non-null values
env_constr_river         950968  non-null values
env_constr_landslide     950968  non-null values
far_id                   949376  non-null values
dtypes: float64(6), int64(5)

In [5]:
import pandas as pd
parcels = pd.merge(parcels1,parcels2,left_on='parcel_id',right_on='parcel_id')

In [6]:
parcels.far_id = parcels.far_id.fillna(0)

In [7]:
parcels['prop_constrained'] = parcels.env_constr_park + parcels.env_constr_lake + parcels.env_constr_floodplain + parcels.env_constr_river + parcels.env_constr_landslide

In [8]:
parcels.prop_constrained[parcels.prop_constrained>1.0] = 1.0

In [9]:
import numpy as np
np.unique(parcels.zone_id)


Out[9]:
88240      1
88595      2
88727      3
93445      4
94051      5
93046      6
78418      7
149567     8
79607      9
100882    10
99333     11
98762     12
106139    13
89451     14
106657    15
...
696695    2790
672868    2791
694409    2792
696230    2793
662713    2794
685050    2795
686915    2796
663273    2797
674536    2798
664698    2799
690652    2800
678826    2801
678621    2802
669819    2803
680824    2804
Name: zone_id, Length: 2797, dtype: int64

In [10]:
# store = pd.HDFStore('c:\\urbansim\\data\\drcog.h5')
# store['parcels'] = parcels
# store.close()

In [11]:
buildings


Out[11]:
<class 'pandas.core.frame.DataFrame'>
Int64Index: 881751 entries, 0 to 881750
Data columns (total 14 columns):
building_id             881751  non-null values
building_type_id        881751  non-null values
improvement_value       881751  non-null values
land_area               881751  non-null values
non_residential_sqft    881751  non-null values
parcel_id               881751  non-null values
residential_units       881751  non-null values
sqft_per_unit           881751  non-null values
stories                 881751  non-null values
tax_exempt              881751  non-null values
year_built              881751  non-null values
building_quality_id     881751  non-null values
srcparc_id              881751  non-null values
bldg_sq_ft              881751  non-null values
dtypes: int64(13), object(1)

In [12]:
parcels


Out[12]:
<class 'pandas.core.frame.DataFrame'>
Int64Index: 949484 entries, 0 to 949483
Data columns (total 22 columns):
parcel_id                949484  non-null values
county_id                949484  non-null values
parcel_sqft              949484  non-null values
land_value               949484  non-null values
zone_id                  949484  non-null values
lu_type_id               949484  non-null values
centroid_x               949484  non-null values
centroid_y               949484  non-null values
tax_exempt_flag          949484  non-null values
school_district          949484  non-null values
zoning_id                949484  non-null values
dist_bus                 949484  non-null values
dist_rail                949484  non-null values
in_ugb                   949484  non-null values
in_uga                   949484  non-null values
env_constr_park          949484  non-null values
env_constr_lake          949484  non-null values
env_constr_floodplain    949484  non-null values
env_constr_river         949484  non-null values
env_constr_landslide     949484  non-null values
far_id                   949484  non-null values
prop_constrained         949484  non-null values
dtypes: float64(7), int64(15)

In [13]:
bp = pd.merge(buildings,parcels,left_on='parcel_id',right_on='parcel_id')

In [14]:
bp


Out[14]:
<class 'pandas.core.frame.DataFrame'>
Int64Index: 881750 entries, 0 to 881749
Data columns (total 35 columns):
building_id              881750  non-null values
building_type_id         881750  non-null values
improvement_value        881750  non-null values
land_area                881750  non-null values
non_residential_sqft     881750  non-null values
parcel_id                881750  non-null values
residential_units        881750  non-null values
sqft_per_unit            881750  non-null values
stories                  881750  non-null values
tax_exempt               881750  non-null values
year_built               881750  non-null values
building_quality_id      881750  non-null values
srcparc_id               881750  non-null values
bldg_sq_ft               881750  non-null values
county_id                881750  non-null values
parcel_sqft              881750  non-null values
land_value               881750  non-null values
zone_id                  881750  non-null values
lu_type_id               881750  non-null values
centroid_x               881750  non-null values
centroid_y               881750  non-null values
tax_exempt_flag          881750  non-null values
school_district          881750  non-null values
zoning_id                881750  non-null values
dist_bus                 881750  non-null values
dist_rail                881750  non-null values
in_ugb                   881750  non-null values
in_uga                   881750  non-null values
env_constr_park          881750  non-null values
env_constr_lake          881750  non-null values
env_constr_floodplain    881750  non-null values
env_constr_river         881750  non-null values
env_constr_landslide     881750  non-null values
far_id                   881750  non-null values
prop_constrained         881750  non-null values
dtypes: float64(7), int64(27), object(1)