spandex spatial operations


In [1]:
import os

import spandex
from spandex import spatialtoolz

Reload database from shapefile inputs


In [2]:
# Connect to database if not already connected.
loader = spandex.TableLoader()

# Recreate PostgreSQL sample schema.
with loader.database.cursor() as cur:
    cur.execute("""
        CREATE EXTENSION IF NOT EXISTS postgis;
        DROP SCHEMA IF EXISTS sample CASCADE;
        CREATE SCHEMA sample;
    """)
loader.database.refresh()

# Load all shapefiles in test data directory.
data_path = '../test_data'
for filename in os.listdir(data_path):
    file_root, file_ext = os.path.splitext(filename)
    if file_ext.lower() == '.shp':
        shp_path = os.path.join(data_path, filename)
        table_name = 'sample.' + file_root
        loader.load_shp(filename, table_name)

# Assign shorter variable names for convenience.
t = loader.tables.sample
parcels = t.heather_farms
bg = t.hf_bg
water = t.hf_water

Function examples


In [3]:
# conform_srid
print(spatialtoolz.conform_srids.__doc__)
print("Reprojecting to project SRID: {}".format(loader.srid))
spatialtoolz.conform_srids(loader.srid, schema=t)


    Reproject all non-conforming geometry columns into the specified SRID.

    Parameters
    ----------
    srid : int
        Spatial Reference System Identifier (SRID).
    schema : schema class
        If schema is specified, only SRIDs within the specified schema
        are conformed.

    Returns
    -------
    None

    
Reprojecting to project SRID: 2768

In [4]:
# geom_invalid
print(spatialtoolz.geom_invalid.__doc__)
spatialtoolz.geom_invalid(parcels, index=parcels.parcel_id)


    Return DataFrame with information on records with invalid geometry.

    Returned columns include record identifier, whether geometry is simple,
    and reason for invalidity.

    Parameters
    ----------
    table : sqlalchemy.ext.declarative.DeclarativeMeta
        Table ORM class to diagnose.
    index : sqlalchemy.orm.attributes.InstrumentedAttribute, optional
        Column ORM object to use as index.

    Returns
    -------
    df : pandas.DataFrame

    
Out[4]:
simple reason geom
parcel_id

In [5]:
# geom_duplicate
print(spatialtoolz.geom_duplicate.__doc__)
spatialtoolz.geom_duplicate(parcels)


    Return DataFrame with all records that have identical, stacked geometry.

    Parameters
    ----------
    table : sqlalchemy.ext.declarative.DeclarativeMeta
        Table ORM class to diagnose.

    Returns
    -------
    df : pandas.DataFrame

    
Out[5]:
gid parcel_id puid county shape_leng shape_area city centroid parcel_acr geom

In [6]:
# geom_overlapping
print(spatialtoolz.geom_overlapping.__doc__)
spatialtoolz.geom_overlapping(parcels, key_name='parcel_id',
                              output_table_name='parcels_overlapping')


    Export overlapping geometries from a table into another table.

    The exported table contains the following columns:
        key_name_a, key_name_b: identifiers of the overlapping pair
        relation: DE-9IM representation of their spatial relation
        geom_a, geom_b: corresponding geometries
        overlap: 2D overlapping region (polygons)

    Parameters
    ----------
    table : sqlalchemy.ext.declarative.DeclarativeMeta
        Table ORM class to query for overlapping geometries.
    key_name : str
        Name of column in the queried table containing a unique identifier,
        such as a primary key, to use for cross join and to identify
        geometries in the exported table.
    output_table_name : str
        Name of exported table. Table is created in the same schema as
        the queried table.

    Returns
    -------
    None

    

In [7]:
# geom_unfilled
print(spatialtoolz.geom_unfilled.__doc__)
spatialtoolz.geom_unfilled(parcels, output_table_name='parcels_unfilled')


    Export rows containing interior rings into another table.

    Include the unfilled geometry in the exported table as a new column
    named "unfilled".

    Parameters
    ----------
     table : sqlalchemy.ext.declarative.DeclarativeMeta
        Table ORM class to query for rows containing geometries with
        interior rings.
    output_table_name : str
        Name of exported table. Table is created in the same schema as
        the queried table.

    Returns
    -------
    None

    

In [8]:
# calc_area
print(spatialtoolz.calc_area.__doc__)
spatialtoolz.calc_area(parcels)


    Calculate area in units of projection and store value in calc_area column.

    Parameters
    ----------
    table : sqlalchemy.ext.declarative.DeclarativeMeta
        Table ORM class with geom column to calculate area for. Value is
        stored in the calc_area column, which is created if it does not exist.

    Returns
    -------
    None

    

In [9]:
# calc_dist
print(spatialtoolz.calc_dist.__doc__)
spatialtoolz.calc_dist(parcels, water.geom)


    Calculate distance between a table of geometries and a geometry column.

    Calculates the minimum Cartesian distance in units of projection between
    each geometry in the table and the nearest point in the geometry column.
    Geometries must have the same projection (SRID).

    Parameters
    ----------
    table : sqlalchemy.ext.declarative.DeclarativeMeta
        Table ORM class with geom column to calculate distance from. Value is
        stored in the calc_dist column, which is created if it does not exist.
    geom : sqlalchemy.orm.Query,
           sqlalchemy.orm.attributes.InstrumentedAttribute
        ORM object to calculate distance to, like a column or query.
        Must contain only one column. Rows are aggregated into a MULTI object
        with ST_Collect (faster union that does not dissolve boundaries).

    Returns
    -------
    column : sqlalchemy.orm.attributes.InstrumentedAttribute
        Column containing distances from the table to the geometry column.

    
Out[9]:
<sqlalchemy.orm.attributes.InstrumentedAttribute at 0x7f5695d316d0>

In [10]:
# tag
print(spatialtoolz.tag.__doc__)
spatialtoolz.tag(parcels, 'bg_id', bg, 'objectid')


    Tag target table with attribute of a spatially-related source table.

    Parameters
    ----------
    target_table : sqlalchemy.ext.declarative.DeclarativeMeta
        Target table ORM class to be tagged.
    target_column_name : str
        Name of column in target table to add (if doesn't exist)
        or update (if exists). This where the tag value will be stored.
    source_table : sqlalchemy.ext.declarative.DeclarativeMeta
        Source table ORM class containing information to tag target table.
    source_column_name : str
        Name of column in source table that contains the tagging information.
    how : str, optional
        How to relate the two tables spatially.
        If not specified, defaults to 'point_in_poly'.
        Other spatial relationships are not currently supported.
    df : pandas.DataFrame, optional
        DataFrame to return a tagged copy of.

    Returns
    -------
    None
        However, if df argument is provided, pandas.DataFrame with the
        new or updated column is returned.

    

In [11]:
# proportion_overlap
print(spatialtoolz.proportion_overlap.__doc__)
spatialtoolz.proportion_overlap(parcels, water, 'proportion_water')


    Calculate proportion of target table geometry overlap.

    Calculate proportion of geometry area in each row of target table that
    is overlapped by another table's geometry. Populate specified column in
    target table with proportion overlap value.

    Parameters
    ----------
    target_table : sqlalchemy.ext.declarative.DeclarativeMeta
        Target table ORM class containing geometry to overlap.
    over_table : sqlalchemy.ext.declarative.DeclarativeMeta
        Table ORM class containing overlapping geometry.
    column_name : str
        Name of column in target table to add (if doesn't exist) or
        update (if exists). This is where the proportion overlap value
        will be stored.
    df : pandas.DataFrame, optional
        DataFrame to return a copy of with proportion overlap calculation.

    Returns
    -------
    None
        However, if df argument is provided, pandas.DataFrame with the
        new or updated column is returned.

    

In [12]:
# trim
print(spatialtoolz.trim.__doc__)
spatialtoolz.trim(parcels.geom, water.geom)


    Trim target geometry by removing intersection with a trim column.

    Parameters
    ----------
    target_col : sqlalchemy.orm.attributes.InstrumentedAttribute
        Column ORM object to trim.
    trim_col : sqlalchemy.orm.attributes.InstrumentedAttribute
        Column ORM object to trim target column with.

    Returns
    -------
    None

    

Wrap PostGIS


In [13]:
# db_to_df
print(spandex.db_to_df.__doc__)


    Return DataFrame from Query, table, or ORM objects, like columns.

    Parameters
    ----------
    query : sqlalchemy.orm.Query, sqlalchemy.ext.declarative.DeclarativeMeta,
            or iterable
        Query ORM object, table ORM class, or list of ORM objects to query,
        like columns.
    index_name : str, optional
        Name of column to use as DataFrame index. If provided, column
        must be contained in query.

    Returns
    -------
    df : pandas.DataFrame

    

In [14]:
spandex.db_to_df(water, index_col='id')


Out[14]:
gid county cfcc landname landpoly gavprimary geometry_a geometry_l geom
id
66 1 06095 H51 Honker Bay 0 66 0 0 3031303630303030323044303041303030303146303030...

In [15]:
with loader.database.session() as sess:
    q = sess.query(
        parcels.parcel_id, parcels.county,
        parcels.calc_area, parcels.calc_dist
    ).filter(
        parcels.calc_dist < 10
    )
spandex.db_to_df(q, index_col='parcel_id')


Out[15]:
county calc_area calc_dist
parcel_id
1310619 cnc 940.179147 5.916508
1328890 cnc 2502.951388 8.538715
1329689 cnc 1531.733259 2.913542
1361759 cnc 39137.841912 1.766306
1351096 cnc 152237.591686 0.000000
1314915 cnc 1263.207494 0.000000
1314939 cnc 1318.958161 0.000000
1314951 cnc 5152.302568 0.000000
1329426 cnc 1623.131364 0.000000
1329526 cnc 1043.805765 0.000000
1329539 cnc 580.638017 0.000000
1329613 cnc 1968.318081 0.000000
1329720 cnc 1324.626665 0.000000
1329721 cnc 1644.098390 0.000000
1330034 cnc 1236.185722 0.000000
1330525 cnc 17893.177556 0.000000

In [16]:
# db_to_db
print(spandex.db_to_db.__doc__)
spandex.db_to_db(q, name='watery_parcels', schema=t, view=True)


    Create a table or view from Query, table, or ORM objects, like columns.

    Do not use to duplicate a table. The new table will not contain
    indexes or constraints, including primary keys.

    Parameters
    ----------
    query : sqlalchemy.orm.Query, sqlalchemy.ext.declarative.DeclarativeMeta,
            or iterable
        Query ORM object, table ORM class, or list of ORM objects to query,
        like columns.
    name : str
        Name of table or view to create.
    schema : schema class, optional
        Schema of table to create. Defaults to public.
    view : bool, optional
        Whether to create a view instead of a table. Defaults to False.

    Returns
    -------
    None