In [1]:
from pathlib import Path
import json
from functools import reduce
import math
import datetime as dt
import pytz 
from itertools import product
from collections import OrderedDict
import time
import sys

import requests
import numpy as np
import pandas as pd
import geopandas as gpd
import shapely.ops as so
import plotly as py
import plotly.offline as po
import plotly.graph_objs as go

import helpers as hp

po.init_notebook_mode(connected=True)
%load_ext autoreload
%autoreload 2


Plot rents over time


In [2]:
rents = hp.get_data('rents')
print(rents['quarter'].unique())
rents.head()


['1993-03-01' '1993-06-01' '1993-09-01' '1993-12-01' '1994-03-01'
 '1994-06-01' '1994-09-01' '1994-12-01' '1995-03-01' '1995-06-01'
 '1995-09-01' '1995-12-01' '1996-03-01' '1996-06-01' '1996-09-01'
 '1996-12-01' '1997-03-01' '1997-06-01' '1997-09-01' '1997-12-01'
 '1998-03-01' '1998-06-01' '1998-09-01' '1998-12-01' '1999-03-01'
 '1999-06-01' '1999-09-01' '1999-12-01' '2000-03-01' '2000-06-01'
 '2000-09-01' '2000-12-01' '2001-03-01' '2001-06-01' '2001-09-01'
 '2001-12-01' '2002-03-01' '2002-06-01' '2002-09-01' '2002-12-01'
 '2003-03-01' '2003-06-01' '2003-09-01' '2003-12-01' '2004-03-01'
 '2004-06-01' '2004-09-01' '2004-12-01' '2005-03-01' '2005-06-01'
 '2005-09-01' '2005-12-01' '2006-03-01' '2006-06-01' '2006-09-01'
 '2006-12-01' '2007-03-01' '2007-06-01' '2007-09-01' '2007-12-01'
 '2008-03-01' '2008-06-01' '2008-09-01' '2008-12-01' '2009-03-01'
 '2009-06-01' '2009-09-01' '2009-12-01' '2010-03-01' '2010-06-01'
 '2010-09-01' '2010-12-01' '2011-03-01' '2011-06-01' '2011-09-01'
 '2011-12-01' '2012-03-01' '2012-06-01' '2012-09-01' '2012-12-01'
 '2013-03-01' '2013-06-01' '2013-09-01' '2013-12-01' '2014-03-01'
 '2014-06-01' '2014-09-01' '2014-12-01' '2015-03-01' '2015-06-01'
 '2015-09-01' '2015-12-01' '2016-03-01' '2016-06-01' '2016-09-01'
 '2016-12-01' '2017-03-01' '2017-06-01' '2017-09-01' '2017-12-01'
 '2018-03-01' '2018-06-01']
Out[2]:
au2001 property_type #bedrooms quarter rent_count rent_mean rent_geo_mean au_name territory region rental_area
0 500100 House 2 1993-03-01 NaN NaN NaN Awanui Far North District Northland Rural Far North
1 500100 House 3 1993-03-01 NaN NaN NaN Awanui Far North District Northland Rural Far North
2 500100 House 2 1993-06-01 NaN NaN NaN Awanui Far North District Northland Rural Far North
3 500100 House 3 1993-06-01 NaN NaN NaN Awanui Far North District Northland Rural Far North
4 500100 House 2 1993-09-01 NaN NaN NaN Awanui Far North District Northland Rural Far North

In [3]:
# What fraction of rental data do we have by #bedrooms?

def hits(group):
    d = {}
    d['hit_frac'] = group['rent_count'].dropna().shape[0]/group['rent_count'].shape[0]
    return pd.Series(d)

f = hp.aggregate_rents(rents, groupby_cols=('au2001', '#bedrooms'))
cond = f['region'] == 'Auckland'
print('census area units')
print(f[cond].copy().groupby('#bedrooms').apply(hits).reset_index())

f = hp.aggregate_rents(rents, groupby_cols=('rental_area', '#bedrooms'))
cond = f['region'] == 'Auckland'
print('rental area units')
print(f[cond].copy().groupby('#bedrooms').apply(hits).reset_index())


/home/araichev/affordability_nz/py/helpers.py:190: RuntimeWarning:

invalid value encountered in double_scalars

census area units
  #bedrooms  hit_frac
0         1       1.0
1         2       1.0
2         3       1.0
3         4       1.0
4        5+       1.0
rental area units
  #bedrooms  hit_frac
0         1       1.0
1         2       1.0
2         3       1.0
3         4       1.0
4        5+       1.0

In [4]:
# Probe by territory
f = hp.aggregate_rents(rents, groupby_cols=('quarter', 'territory'))
f.head()


/home/araichev/affordability_nz/py/helpers.py:190: RuntimeWarning:

invalid value encountered in double_scalars

Out[4]:
quarter territory region rent_count rent_geo_mean rent_mean
0 1993-03-01 Ashburton District Canterbury 11.0 109.051015 110.090909
1 1993-03-01 Auckland City Auckland 2423.0 178.667543 193.790343
2 1993-03-01 Buller District West Coast 18.0 85.848923 88.666667
3 1993-03-01 Carterton District Wellington 22.0 90.979874 97.045455
4 1993-03-01 Central Hawke'S Bay District Hawke's Bay 10.0 114.000000 115.000000

In [5]:
# Plot
for region, ff in f.groupby('region'):
    data = []
    for territory, g in ff.groupby('territory'):
        trace = go.Scatter(
          x=g['quarter'], 
          y=g['rent_geo_mean'].round(), 
          name=territory,
        )
        data.append(trace)

    layout = go.Layout(
       height=600,
       title='{!s} geometric mean rents'.format(region),
       yaxis=dict(
           ticksuffix=' $',
       )
    )
    fig = go.Figure(data=data, layout=layout)
    po.iplot(fig)



In [ ]: