In [1]:
from pathlib import Path
import json
from functools import reduce
import math
import datetime as dt
import pytz 
from itertools import product
from collections import OrderedDict
import time
import sys

import requests
import numpy as np
import pandas as pd
import geopandas as gpd
import shapely.ops as so

import helpers as hp

%load_ext autoreload
%autoreload 2
%matplotlib inline

Prepare table of 2001 area units and rental area units


In [ ]:
# 2001 census area units
path = hp.DATA_DIR/'collected'/'Geographical Table.csv'
f = pd.read_csv(path, dtype={'SAU': str})
f = f.rename(columns={
    'SAU': 'au2001', 
    'SAU.Desc': 'au_name', 
    'TA': 'territory',
    'Region': 'region',
})
del f['Water']
f.head()

# rental area units
path = hp.DATA_DIR/'collected'/'Market Rent Areas.csv'
g = pd.read_csv(path, dtype={'SAU': str})
g = g.rename(columns={
    'SAU': 'au2001', 
    'MARKET RENT DESCRIPTION': 'rental_area',
    'TA': 'territory',
    'AU NAME': 'au_name',
})

# Clean rental areas
def clean(x):
    y = x.split(' - ')
    y = y[1] if 'District' not in y[1] else y[0]
    return y

g['rental_area'] = g['rental_area'].map(clean)


f = f.merge(g[['au2001', 'rental_area']])

path = hp.get_path('au2001_csv')
f.to_csv(path, index=False)
f.head()

Process area units and rental areas into GeoJSON


In [ ]:
# Read Shapefile

path = hp.DATA_DIR/'collected'/'NZ_AU01_region_simplified'/'NZ_AU01_region.shp'
au = gpd.read_file(str(path))
au.crs = hp.CRS_NZGD49
au = au.to_crs(hp.CRS_WGS84)
au = au.rename(columns={'AU01': 'au2001', 'AU_DESC': 'au_name'})
print(au.shape)
print(au.head())
au.head().plot()

In [ ]:
# Remove water area units

pattern = r'ocean|strait|inlet|harbour'
cond = au['au_name'].str.contains(pattern, case=False)
au = au[~cond].copy()
print(au.shape)
au.head().plot()

In [ ]:
# Merge geodata and metadata, drop null regions, and write to file

f = hp.get_data('au2001_csv')

g = au.merge(f[['au2001', 'territory', 'region', 'rental_area']])
g = g[g['region'].notnull()].copy()

path = hp.get_path('au2001')
with path.open('w') as tgt:
    tgt.write(g.to_json())

g.head()

Create geodata for rental areas


In [ ]:
# Dissolve area units by area unit group

au = get_data('au2001')
ra = au[['rental_area', 'region', 'territory', 'geometry']].dissolve(by='rental_area').reset_index()

path = hp.get_path('rental_areas')
with path.open('w') as tgt:
    tgt.write(ra.to_json())

ra.head()

Choose representative points for rental areas using approximate centroids of property titles


In [2]:
ra = hp.get_data('rental_areas')
t = hp.get_data('property_titles')
t.head()


Out[2]:
LGD_ID OWNERS PAR_ID TTL_TITLE geometry
0 21149591 Yeung J 6683994 122991 POINT (174.9064763832665 -36.95116400076868)
1 3410291 Vajsakovic D 5154438 NA11A/102 POINT (174.6539389831913 -36.82909730112425)
2 3414488 Grbic I L:Lowe J D 4826167 NA32C/33 POINT (173.3733926499484 -34.87991641687911)
3 3421826 Kim K 5013176 NA459/84 POINT (174.7440597997697 -36.79408678372536)
4 3438504 Skeen R 5065364 NA22A/1323 POINT (174.6284382829138 -36.85095825040765)

In [ ]:
# Spatial-join titles to rental areas

%time f = gpd.sjoin(t[['geometry', 'fid']], ra, op='intersects')
f.head()

In [ ]:
# Choose representative points for rental areas

def pt(group):
    d = {}
    d['geometry'] = so.unary_union(group['geometry']).representative_point()
    d['territory'] = group['territory'].iat[0]
    d['region'] = group['region'].iat[0]
    return pd.Series(d)

g = gpd.GeoDataFrame(f.groupby('rental_area').apply(pt).reset_index())

path = hp.get_path('rental_points')
with path.open('w') as tgt:
    tgt.write(g.to_json())

g.head()

Prepare regional slices of data


In [ ]:
ra = hp.get_data('rental_areas')
rap = hp.get_data('rental_points')

for region in hp.REGIONS:
    region_root = hp.DATA_DIR/region
    if not region_root.exists():
        region_root.mkdir()
        
    region_c = region.capitalize()

    # Rental areas slice
    f = ra[ra['region'] == region_c].copy()
    path = hp.get_path('rental_areas', region)
    with path.open('w') as tgt:
        tgt.write(f.to_json())
        
    # Rental area points slice
    f = rap[rap['region'] == region_c].copy()
    path = hp.get_path('rental_points', region)
    with path.open('w') as tgt:
        tgt.write(f.to_json())

In [ ]: