In [1]:
from pathlib import Path
import json
from functools import reduce
import math
import datetime as dt
import pytz 
from itertools import product
from collections import OrderedDict
import time
import sys

import requests
import numpy as np
import pandas as pd
import geopandas as gpd
import shapely.ops as so
import plotly as py
import plotly.offline as po
import plotly.graph_objs as go

import helpers as hp

po.init_notebook_mode(connected=True)
%load_ext autoreload
%autoreload 2


Plot rents over time


In [2]:
rents = hp.get_data('rents')
print(rents['quarter'].unique())
rents.head()


['1993-03-01' '1993-06-01' '1993-09-01' '1993-12-01' '1994-03-01'
 '1994-06-01' '1994-09-01' '1994-12-01' '1995-03-01' '1995-06-01'
 '1995-09-01' '1995-12-01' '1996-03-01' '1996-06-01' '1996-09-01'
 '1996-12-01' '1997-03-01' '1997-06-01' '1997-09-01' '1997-12-01'
 '1998-03-01' '1998-06-01' '1998-09-01' '1998-12-01' '1999-03-01'
 '1999-06-01' '1999-09-01' '1999-12-01' '2000-03-01' '2000-06-01'
 '2000-09-01' '2000-12-01' '2001-03-01' '2001-06-01' '2001-09-01'
 '2001-12-01' '2002-03-01' '2002-06-01' '2002-09-01' '2002-12-01'
 '2003-03-01' '2003-06-01' '2003-09-01' '2003-12-01' '2004-03-01'
 '2004-06-01' '2004-09-01' '2004-12-01' '2005-03-01' '2005-06-01'
 '2005-09-01' '2005-12-01' '2006-03-01' '2006-06-01' '2006-09-01'
 '2006-12-01' '2007-03-01' '2007-06-01' '2007-09-01' '2007-12-01'
 '2008-03-01' '2008-06-01' '2008-09-01' '2008-12-01' '2009-03-01'
 '2009-06-01' '2009-09-01' '2009-12-01' '2010-03-01' '2010-06-01'
 '2010-09-01' '2010-12-01' '2011-03-01' '2011-06-01' '2011-09-01'
 '2011-12-01' '2012-03-01' '2012-06-01' '2012-09-01' '2012-12-01'
 '2013-03-01' '2013-06-01' '2013-09-01' '2013-12-01' '2014-03-01'
 '2014-06-01' '2014-09-01' '2014-12-01' '2015-03-01' '2015-06-01'
 '2015-09-01' '2015-12-01' '2016-03-01' '2016-06-01' '2016-09-01'
 '2016-12-01' '2017-03-01' '2017-06-01' '2017-09-01']
Out[2]:
au2001 property_type #bedrooms quarter rent_count rent_mean rent_geo_mean au_name territory region rental_area
0 500100 House 2 1993-03-01 NaN NaN NaN Awanui Far North District Northland Rural Far North
1 500100 House 3 1993-03-01 NaN NaN NaN Awanui Far North District Northland Rural Far North
2 500100 House 2 1993-06-01 NaN NaN NaN Awanui Far North District Northland Rural Far North
3 500100 House 3 1993-06-01 NaN NaN NaN Awanui Far North District Northland Rural Far North
4 500100 House 2 1993-09-01 NaN NaN NaN Awanui Far North District Northland Rural Far North

In [3]:
# What fraction of rental data do we have by #bedrooms?

def hits(group):
    d = {}
    d['hit_frac'] = group['rent_count'].dropna().shape[0]/group['rent_count'].shape[0]
    return pd.Series(d)

f = hp.aggregate_rents(rents, groupby_cols=('au2001', '#bedrooms'))
cond = f['region'] == 'Auckland'
print('census area units')
print(f[cond].copy().groupby('#bedrooms').apply(hits).reset_index())

f = hp.aggregate_rents(rents, groupby_cols=('rental_area', '#bedrooms'))
cond = f['region'] == 'Auckland'
print('rental area units')
print(f[cond].copy().groupby('#bedrooms').apply(hits).reset_index())


census area units
  #bedrooms  hit_frac
0         1  0.706250
1         2  0.906977
2         3  0.971510
3         4  0.887608
4        5+  0.371795
rental area units
  #bedrooms  hit_frac
0         1  0.919192
1         2  1.000000
2         3  1.000000
3         4  0.979798
4        5+  0.676768

In [4]:
# Probe by territory
f = hp.aggregate_rents(rents, groupby_cols=('quarter', 'territory'))
f.head()


Out[4]:
quarter territory region rent_count rent_geo_mean rent_mean
0 1993-03-01 Ashburton District Canterbury 11.0 109.051015 110.090909
1 1993-03-01 Auckland City Auckland 2422.0 178.693647 193.843931
2 1993-03-01 Buller District West Coast 17.0 85.020812 87.882353
3 1993-03-01 Carterton District Wellington 22.0 90.979874 97.045455
4 1993-03-01 Central Hawke'S Bay District Hawke's Bay 10.0 114.000000 115.000000

In [5]:
# Plot
for region, ff in f.groupby('region'):
    data = []
    for territory, g in ff.groupby('territory'):
        trace = go.Scatter(
          x=g['quarter'], 
          y=g['rent_geo_mean'].round(), 
          name=territory,
        )
        data.append(trace)

    layout = go.Layout(
       height=600,
       title='{!s} geometric mean rents'.format(region),
       yaxis=dict(
           ticksuffix=' $',
       )
    )
    fig = go.Figure(data=data, layout=layout)
    po.iplot(fig)



In [ ]: