spandex spatial operations



In [1]:

    
import os

import spandex
from spandex import spatialtoolz

Reload database from shapefile inputs



In [2]:

    
# Connect to database if not already connected.
loader = spandex.TableLoader()

# Recreate PostgreSQL sample schema.
with loader.database.cursor() as cur:
    cur.execute("""
        CREATE EXTENSION IF NOT EXISTS postgis;
        DROP SCHEMA IF EXISTS sample CASCADE;
        CREATE SCHEMA sample;
    """)
loader.database.refresh()

# Load all shapefiles in test data directory.
data_path = '../test_data'
for filename in os.listdir(data_path):
    file_root, file_ext = os.path.splitext(filename)
    if file_ext.lower() == '.shp':
        shp_path = os.path.join(data_path, filename)
        table_name = 'sample.' + file_root
        loader.load_shp(filename, table_name)

# Assign shorter variable names for convenience.
t = loader.tables.sample
parcels = t.heather_farms
bg = t.hf_bg
water = t.hf_water

Function examples



In [3]:

    
# conform_srid
print(spatialtoolz.conform_srids.__doc__)
print("Reprojecting to project SRID: {}".format(loader.srid))
spatialtoolz.conform_srids(loader.srid, schema=t)









    



    Reproject all non-conforming geometry columns into the specified SRID.

    Parameters
    ----------
    srid : int
        Spatial Reference System Identifier (SRID).
    schema : schema class
        If schema is specified, only SRIDs within the specified schema
        are conformed.

    Returns
    -------
    None

    
Reprojecting to project SRID: 2768



In [4]:

    
# geom_invalid
print(spatialtoolz.geom_invalid.__doc__)
spatialtoolz.geom_invalid(parcels, index=parcels.parcel_id)









    



    Return DataFrame with information on records with invalid geometry.

    Returned columns include record identifier, whether geometry is simple,
    and reason for invalidity.

    Parameters
    ----------
    table : sqlalchemy.ext.declarative.DeclarativeMeta
        Table ORM class to diagnose.
    index : sqlalchemy.orm.attributes.InstrumentedAttribute, optional
        Column ORM object to use as index.

    Returns
    -------
    df : pandas.DataFrame

    






    Out[4]:






  
    
      
      simple
      reason
      geom
    
    
      parcel_id



In [5]:

    
# geom_duplicate
print(spatialtoolz.geom_duplicate.__doc__)
spatialtoolz.geom_duplicate(parcels)









    



    Return DataFrame with all records that have identical, stacked geometry.

    Parameters
    ----------
    table : sqlalchemy.ext.declarative.DeclarativeMeta
        Table ORM class to diagnose.

    Returns
    -------
    df : pandas.DataFrame

    






    Out[5]:






  
    
      
      gid
      parcel_id
      puid
      county
      shape_leng
      shape_area
      city
      centroid
      parcel_acr
      geom



In [6]:

    
# geom_overlapping
print(spatialtoolz.geom_overlapping.__doc__)
spatialtoolz.geom_overlapping(parcels, key_name='parcel_id',
                              output_table_name='parcels_overlapping')









    



    Export overlapping geometries from a table into another table.

    The exported table contains the following columns:
        key_name_a, key_name_b: identifiers of the overlapping pair
        relation: DE-9IM representation of their spatial relation
        geom_a, geom_b: corresponding geometries
        overlap: 2D overlapping region (polygons)

    Parameters
    ----------
    table : sqlalchemy.ext.declarative.DeclarativeMeta
        Table ORM class to query for overlapping geometries.
    key_name : str
        Name of column in the queried table containing a unique identifier,
        such as a primary key, to use for cross join and to identify
        geometries in the exported table.
    output_table_name : str
        Name of exported table. Table is created in the same schema as
        the queried table.

    Returns
    -------
    None



In [7]:

    
# geom_unfilled
print(spatialtoolz.geom_unfilled.__doc__)
spatialtoolz.geom_unfilled(parcels, output_table_name='parcels_unfilled')









    



    Export rows containing interior rings into another table.

    Include the unfilled geometry in the exported table as a new column
    named "unfilled".

    Parameters
    ----------
     table : sqlalchemy.ext.declarative.DeclarativeMeta
        Table ORM class to query for rows containing geometries with
        interior rings.
    output_table_name : str
        Name of exported table. Table is created in the same schema as
        the queried table.

    Returns
    -------
    None



In [8]:

    
# calc_area
print(spatialtoolz.calc_area.__doc__)
spatialtoolz.calc_area(parcels)









    



    Calculate area in units of projection and store value in calc_area column.

    Parameters
    ----------
    table : sqlalchemy.ext.declarative.DeclarativeMeta
        Table ORM class with geom column to calculate area for. Value is
        stored in the calc_area column, which is created if it does not exist.

    Returns
    -------
    None



In [9]:

    
# calc_dist
print(spatialtoolz.calc_dist.__doc__)
spatialtoolz.calc_dist(parcels, water.geom)









    



    Calculate distance between a table of geometries and a geometry column.

    Calculates the minimum Cartesian distance in units of projection between
    each geometry in the table and the nearest point in the geometry column.
    Geometries must have the same projection (SRID).

    Parameters
    ----------
    table : sqlalchemy.ext.declarative.DeclarativeMeta
        Table ORM class with geom column to calculate distance from. Value is
        stored in the calc_dist column, which is created if it does not exist.
    geom : sqlalchemy.orm.Query,
           sqlalchemy.orm.attributes.InstrumentedAttribute
        ORM object to calculate distance to, like a column or query.
        Must contain only one column. Rows are aggregated into a MULTI object
        with ST_Collect (faster union that does not dissolve boundaries).

    Returns
    -------
    column : sqlalchemy.orm.attributes.InstrumentedAttribute
        Column containing distances from the table to the geometry column.

    






    Out[9]:





<sqlalchemy.orm.attributes.InstrumentedAttribute at 0x7f5695d316d0>



In [10]:

    
# tag
print(spatialtoolz.tag.__doc__)
spatialtoolz.tag(parcels, 'bg_id', bg, 'objectid')









    



    Tag target table with attribute of a spatially-related source table.

    Parameters
    ----------
    target_table : sqlalchemy.ext.declarative.DeclarativeMeta
        Target table ORM class to be tagged.
    target_column_name : str
        Name of column in target table to add (if doesn't exist)
        or update (if exists). This where the tag value will be stored.
    source_table : sqlalchemy.ext.declarative.DeclarativeMeta
        Source table ORM class containing information to tag target table.
    source_column_name : str
        Name of column in source table that contains the tagging information.
    how : str, optional
        How to relate the two tables spatially.
        If not specified, defaults to 'point_in_poly'.
        Other spatial relationships are not currently supported.
    df : pandas.DataFrame, optional
        DataFrame to return a tagged copy of.

    Returns
    -------
    None
        However, if df argument is provided, pandas.DataFrame with the
        new or updated column is returned.



In [11]:

    
# proportion_overlap
print(spatialtoolz.proportion_overlap.__doc__)
spatialtoolz.proportion_overlap(parcels, water, 'proportion_water')









    



    Calculate proportion of target table geometry overlap.

    Calculate proportion of geometry area in each row of target table that
    is overlapped by another table's geometry. Populate specified column in
    target table with proportion overlap value.

    Parameters
    ----------
    target_table : sqlalchemy.ext.declarative.DeclarativeMeta
        Target table ORM class containing geometry to overlap.
    over_table : sqlalchemy.ext.declarative.DeclarativeMeta
        Table ORM class containing overlapping geometry.
    column_name : str
        Name of column in target table to add (if doesn't exist) or
        update (if exists). This is where the proportion overlap value
        will be stored.
    df : pandas.DataFrame, optional
        DataFrame to return a copy of with proportion overlap calculation.

    Returns
    -------
    None
        However, if df argument is provided, pandas.DataFrame with the
        new or updated column is returned.



In [12]:

    
# trim
print(spatialtoolz.trim.__doc__)
spatialtoolz.trim(parcels.geom, water.geom)









    



    Trim target geometry by removing intersection with a trim column.

    Parameters
    ----------
    target_col : sqlalchemy.orm.attributes.InstrumentedAttribute
        Column ORM object to trim.
    trim_col : sqlalchemy.orm.attributes.InstrumentedAttribute
        Column ORM object to trim target column with.

    Returns
    -------
    None

Wrap PostGIS



In [13]:

    
# db_to_df
print(spandex.db_to_df.__doc__)









    



    Return DataFrame from Query, table, or ORM objects, like columns.

    Parameters
    ----------
    query : sqlalchemy.orm.Query, sqlalchemy.ext.declarative.DeclarativeMeta,
            or iterable
        Query ORM object, table ORM class, or list of ORM objects to query,
        like columns.
    index_name : str, optional
        Name of column to use as DataFrame index. If provided, column
        must be contained in query.

    Returns
    -------
    df : pandas.DataFrame



In [14]:

    
spandex.db_to_df(water, index_col='id')









    Out[14]:






  
    
      
      gid
      county
      cfcc
      landname
      landpoly
      gavprimary
      geometry_a
      geometry_l
      geom
    
    
      id
      
      
      
      
      
      
      
      
      
    
  
  
    
      66
       1
       06095
       H51
       Honker Bay
       0
       66
       0
       0
       3031303630303030323044303041303030303146303030...



In [15]:

    
with loader.database.session() as sess:
    q = sess.query(
        parcels.parcel_id, parcels.county,
        parcels.calc_area, parcels.calc_dist
    ).filter(
        parcels.calc_dist < 10
    )
spandex.db_to_df(q, index_col='parcel_id')









    Out[15]:






  
    
      
      county
      calc_area
      calc_dist
    
    
      parcel_id
      
      
      
    
  
  
    
      1310619
       cnc
          940.179147
       5.916508
    
    
      1328890
       cnc
         2502.951388
       8.538715
    
    
      1329689
       cnc
         1531.733259
       2.913542
    
    
      1361759
       cnc
        39137.841912
       1.766306
    
    
      1351096
       cnc
       152237.591686
       0.000000
    
    
      1314915
       cnc
         1263.207494
       0.000000
    
    
      1314939
       cnc
         1318.958161
       0.000000
    
    
      1314951
       cnc
         5152.302568
       0.000000
    
    
      1329426
       cnc
         1623.131364
       0.000000
    
    
      1329526
       cnc
         1043.805765
       0.000000
    
    
      1329539
       cnc
          580.638017
       0.000000
    
    
      1329613
       cnc
         1968.318081
       0.000000
    
    
      1329720
       cnc
         1324.626665
       0.000000
    
    
      1329721
       cnc
         1644.098390
       0.000000
    
    
      1330034
       cnc
         1236.185722
       0.000000
    
    
      1330525
       cnc
        17893.177556
       0.000000



In [16]:

    
# db_to_db
print(spandex.db_to_db.__doc__)
spandex.db_to_db(q, name='watery_parcels', schema=t, view=True)









    



    Create a table or view from Query, table, or ORM objects, like columns.

    Do not use to duplicate a table. The new table will not contain
    indexes or constraints, including primary keys.

    Parameters
    ----------
    query : sqlalchemy.orm.Query, sqlalchemy.ext.declarative.DeclarativeMeta,
            or iterable
        Query ORM object, table ORM class, or list of ORM objects to query,
        like columns.
    name : str
        Name of table or view to create.
    schema : schema class, optional
        Schema of table to create. Defaults to public.
    view : bool, optional
        Whether to create a view instead of a table. Defaults to False.

    Returns
    -------
    None

	gid	county	cfcc	landname	landpoly	gavprimary	geometry_a	geometry_l	geom
id
66	1	06095	H51	Honker Bay	0	66	0	0	3031303630303030323044303041303030303146303030...

	county	calc_area	calc_dist
parcel_id
1310619	cnc	940.179147	5.916508
1328890	cnc	2502.951388	8.538715
1329689	cnc	1531.733259	2.913542
1361759	cnc	39137.841912	1.766306
1351096	cnc	152237.591686	0.000000
1314915	cnc	1263.207494	0.000000
1314939	cnc	1318.958161	0.000000
1314951	cnc	5152.302568	0.000000
1329426	cnc	1623.131364	0.000000
1329526	cnc	1043.805765	0.000000
1329539	cnc	580.638017	0.000000
1329613	cnc	1968.318081	0.000000
1329720	cnc	1324.626665	0.000000
1329721	cnc	1644.098390	0.000000
1330034	cnc	1236.185722	0.000000
1330525	cnc	17893.177556	0.000000