Assign Suburbs to POIs

Use SA2 level from ABS to find suburbs. Uses file downloaded from ABS, which is the ZIP link called Statistical Area Level 2 (SA2) ASGS Ed 2011 Digital Boundaires in ESRI Shapefile Format. The files below are obtained after unzipping the file.

ABS has a habit of changing their links, so the above links are likely to be broken.


In [1]:
from osgeo import ogr, osr
import pandas as pd
import numpy as np
import collections

Extracting SLA2 region names


In [2]:
data_dir = '../data/'
driver = ogr.GetDriverByName('ESRI Shapefile')

node_source = driver.Open(data_dir + 'SA2_2011_AUST.shp', 0)
layer = node_source.GetLayer()
layer.GetFeatureCount()


Out[2]:
2214

In [3]:
layer.ResetReading()
feature = layer.GetNextFeature()
feature.keys()


Out[3]:
['SA2_MAIN11',
 'SA2_5DIG11',
 'SA2_NAME11',
 'SA3_CODE11',
 'SA3_NAME11',
 'SA4_CODE11',
 'SA4_NAME11',
 'GCC_CODE11',
 'GCC_NAME11',
 'STE_CODE11',
 'STE_NAME11',
 'ALBERS_SQM']

In [4]:
layer.ResetReading()
feature = layer.GetNextFeature()
sla2 = []
name = []
while feature:
    # Victoria is state number 2
    if feature.GetField('STE_CODE11') == '2':
        sla2.append(feature.GetField('SA2_MAIN11'))
        name.append(feature.GetField('SA2_NAME11'))
    feature = layer.GetNextFeature()
print(len(sla2))


435

In [5]:
sla_name_dict = {}
for ix, sla in enumerate(sla2):
    sla_name_dict[sla] = name[ix]

Find SLA region name, and add to POIs


In [6]:
def find_points(locations, sla_name, pt, polygon):
    """
    locations : the dataframe containing the locations
    sla_name : the name of the SLA region
    pt : a handle that determines coordinates
    polygon : the polygon defining the SLA
    """
    for idx, row in locations.iterrows():
        lat, long = row['poiLat'], row['poiLon']
        if np.isinf(lat):
            continue
        pt.SetPoint(0, long, lat)
        try:
            inside = pt.Within(polygon)
        except ValueError:
            inside = False
            print(long, lat)
            print('Unable to solve inside polygon')
            return
        if inside:
            locations.loc[idx, 'suburb'] = sla_name

In [7]:
spatial_ref = osr.SpatialReference()
spatial_ref.SetWellKnownGeogCS("WGS84")

pt = ogr.Geometry(ogr.wkbPoint)
pt.AssignSpatialReference(spatial_ref)

In [8]:
layer.ResetReading()
poi = pd.read_csv(data_dir + 'poi-Melb-all.csv')
poi['suburb'] = ''
sla = layer.GetNextFeature()
num_sla = 1
while sla:
    # Victoria is state number 2
    if sla.GetField('STE_CODE11') == '2':
        sla_id = sla.GetField('SA2_MAIN11')
        sla_name = sla_name_dict[sla_id]
        polygon = sla.GetGeometryRef()
        find_points(poi, sla_name, pt, polygon)
        
        # progress bar
        if num_sla % 100 == 0:
            print(num_sla)
        num_sla += 1
        
    sla = layer.GetNextFeature()

poi.head()


100
200
300
400
144.96778 -37.821670000000005
Unable to solve inside polygon
144.96778 -37.821670000000005
Unable to solve inside polygon
Out[8]:
poiID poiName poiTheme poiLat poiLon poiURL suburb
0 0 Arts Precinct City precincts -37.82167 144.96778 https://en.wikipedia.org/wiki/Melbourne_Arts_P... Southbank
1 1 Docklands City precincts -37.81700 144.94600 https://en.wikipedia.org/wiki/Docklands,_Victoria Docklands
2 2 Government Precinct City precincts -37.81190 144.97300 https://en.wikipedia.org/wiki/Spring_Street,_M... Melbourne
3 3 Little Italy City precincts -37.79972 144.96694 https://en.wikipedia.org/wiki/Little_Italy,_Me... Carlton
4 4 RMIT City City precincts -37.80778 144.96333 https://en.wikipedia.org/wiki/City_campus_of_t... Melbourne

In [9]:
poi.to_csv(data_dir + 'poi-Melb-all-suburb.csv', index=False)

Some statistics


In [10]:
print(len(poi))
c_theme = collections.Counter(poi['poiTheme'])
c_suburb = collections.Counter(poi['suburb'])
print(c_theme.most_common(10))
print(c_suburb.most_common(10))


88
[('Shopping', 17), ('Sports stadiums', 14), ('Parks and spaces', 14), ('Institutions', 12), ('City precincts', 8), ('Transport', 8), ('Structures', 8), ('Entertainment', 4), ('Public galleries', 3)]
[('Melbourne', 26), ('Southbank', 16), ('East Melbourne', 15), ('Docklands', 6), ('Parkville', 4), ('Carlton', 4), ('Albert Park', 4), ('Beaumaris', 1), ('', 1), ('Carlton North - Princes Hill', 1)]

In [ ]: