In [1]:
from pathlib import Path
import json
from functools import reduce
import math
import datetime as dt
import pytz
from itertools import product
from collections import OrderedDict
import time
import sys
import requests
import numpy as np
import pandas as pd
import geopandas as gpd
import shapely.ops as so
import helpers as hp
%load_ext autoreload
%autoreload 2
%matplotlib inline
In [ ]:
# 2001 census area units
path = hp.DATA_DIR/'collected'/'Geographical Table.csv'
f = pd.read_csv(path, dtype={'SAU': str})
f = f.rename(columns={
'SAU': 'au2001',
'SAU.Desc': 'au_name',
'TA': 'territory',
'Region': 'region',
})
del f['Water']
f.head()
# rental area units
path = hp.DATA_DIR/'collected'/'Market Rent Areas.csv'
g = pd.read_csv(path, dtype={'SAU': str})
g = g.rename(columns={
'SAU': 'au2001',
'MARKET RENT DESCRIPTION': 'rental_area',
'TA': 'territory',
'AU NAME': 'au_name',
})
# Clean rental areas
def clean(x):
y = x.split(' - ')
y = y[1] if 'District' not in y[1] else y[0]
return y
g['rental_area'] = g['rental_area'].map(clean)
f = f.merge(g[['au2001', 'rental_area']])
path = hp.get_path('au2001_csv')
f.to_csv(path, index=False)
f.head()
In [ ]:
# Read Shapefile
path = hp.DATA_DIR/'collected'/'NZ_AU01_region_simplified'/'NZ_AU01_region.shp'
au = gpd.read_file(str(path))
au.crs = hp.CRS_NZGD49
au = au.to_crs(hp.CRS_WGS84)
au = au.rename(columns={'AU01': 'au2001', 'AU_DESC': 'au_name'})
print(au.shape)
print(au.head())
au.head().plot()
In [ ]:
# Remove water area units
pattern = r'ocean|strait|inlet|harbour'
cond = au['au_name'].str.contains(pattern, case=False)
au = au[~cond].copy()
print(au.shape)
au.head().plot()
In [ ]:
# Merge geodata and metadata, drop null regions, and write to file
f = hp.get_data('au2001_csv')
g = au.merge(f[['au2001', 'territory', 'region', 'rental_area']])
g = g[g['region'].notnull()].copy()
path = hp.get_path('au2001')
with path.open('w') as tgt:
tgt.write(g.to_json())
g.head()
In [ ]:
# Dissolve area units by area unit group
au = get_data('au2001')
ra = au[['rental_area', 'region', 'territory', 'geometry']].dissolve(by='rental_area').reset_index()
path = hp.get_path('rental_areas')
with path.open('w') as tgt:
tgt.write(ra.to_json())
ra.head()
In [2]:
ra = hp.get_data('rental_areas')
t = hp.get_data('property_titles')
t.head()
Out[2]:
In [ ]:
# Spatial-join titles to rental areas
%time f = gpd.sjoin(t[['geometry', 'fid']], ra, op='intersects')
f.head()
In [ ]:
# Choose representative points for rental areas
def pt(group):
d = {}
d['geometry'] = so.unary_union(group['geometry']).representative_point()
d['territory'] = group['territory'].iat[0]
d['region'] = group['region'].iat[0]
return pd.Series(d)
g = gpd.GeoDataFrame(f.groupby('rental_area').apply(pt).reset_index())
path = hp.get_path('rental_points')
with path.open('w') as tgt:
tgt.write(g.to_json())
g.head()
In [ ]:
ra = hp.get_data('rental_areas')
rap = hp.get_data('rental_points')
for region in hp.REGIONS:
region_root = hp.DATA_DIR/region
if not region_root.exists():
region_root.mkdir()
region_c = region.capitalize()
# Rental areas slice
f = ra[ra['region'] == region_c].copy()
path = hp.get_path('rental_areas', region)
with path.open('w') as tgt:
tgt.write(f.to_json())
# Rental area points slice
f = rap[rap['region'] == region_c].copy()
path = hp.get_path('rental_points', region)
with path.open('w') as tgt:
tgt.write(f.to_json())
In [ ]: