This is to generate random points w/ in a polygon.

The RandomPointsinPolygon function is from: http://trac.osgeo.org/postgis/wiki/UserWikiRandomPoint

The trick is to call this on each polygon. Below is how I did it to generate a job point w/ in each census block based on the job counts from the lodes7 data. It probably could be written better, but if you need more markdown / info / whatever, let me know.


In [ ]:
'''
CREATE OR REPLACE FUNCTION RandomPointsInPolygon(geom geometry, num_points integer)
  RETURNS SETOF geometry AS
$BODY$DECLARE
  target_proportion numeric;
  n_ret integer := 0;
  loops integer := 0;
  x_min float8;
  y_min float8;
  x_max float8;
  y_max float8;
  srid integer;
  rpoint geometry;
BEGIN
  -- Get envelope and SRID of source polygon
  SELECT ST_XMin(geom), ST_YMin(geom), ST_XMax(geom), ST_YMax(geom), ST_SRID(geom)
    INTO x_min, y_min, x_max, y_max, srid;
  -- Get the area proportion of envelope size to determine if a
  -- result can be returned in a reasonable amount of time
  SELECT ST_Area(geom)/ST_Area(ST_Envelope(geom)) INTO target_proportion;
  RAISE DEBUG 'geom: SRID %, NumGeometries %, NPoints %, area proportion within envelope %',
                srid, ST_NumGeometries(geom), ST_NPoints(geom),
                round(100.0*target_proportion, 2) || '%';
  
  WHILE n_ret < num_points LOOP
    loops := loops + 1;
    SELECT ST_SetSRID(ST_MakePoint(random()*(x_max - x_min) + x_min,
                                   random()*(y_max - y_min) + y_min),
                      srid) INTO rpoint;
    IF ST_Contains(geom, rpoint) THEN
      n_ret := n_ret + 1;
      RETURN NEXT rpoint;
    END IF;
  END LOOP;
  RAISE DEBUG 'determined in % loops (% efficiency)', loops, round(100.0*num_points/loops, 2) || '%';
END$BODY$
  LANGUAGE plpgsql VOLATILE
  COST 100
  ROWS 1000;
ALTER FUNCTION RandomPointsInPolygon(geometry, integer) OWNER TO postgres;
'''

In [ ]:
'''
with a as(
select a.parcel_id, a.parcel_sqft, a.centroid_x, a.centroid_y, b.the_geom from parcels a join parcels_spatial b on a.parcel_id = b.parcel_id
), b as(
SELECT parcel_id, RandomPointsInPolygon(the_geom, 10) as randpoint
FROM a WHERE (parcel_sqft >= 217800) and (parcel_sqft < 1406800)
), c as(
SELECT parcel_id, RandomPointsInPolygon(the_geom, 100) as randpoint
FROM a WHERE (parcel_sqft >= 1406800)
), d as (
select parcel_id, round(ST_X(ST_Centroid(the_geom))) as x, round(ST_Y(ST_Centroid(the_geom))) as y  from a where parcel_sqft < 217800
), e as (
select parcel_id, round(ST_X(randpoint)) as x, round(ST_Y(randpoint)) as y from b
), f as (
select parcel_id, round(ST_X(randpoint)) as x, round(ST_Y(randpoint)) as y from c
), g as (
select * from d
UNION ALL
select * from e
UNION ALL
select * from f
)
select * into parcel_coords from g
'''

In [ ]:
import pandas as pd
import numpy as np
import psycopg2

conn_string = "host='localhost' dbname='census' user='ntapia' password='' port=5432"
conn=psycopg2.connect(conn_string)
cur = conn.cursor()

In [ ]:
create_function_query = """CREATE OR REPLACE FUNCTION RandomPointsInPolygon(geom geometry, num_points integer)
  RETURNS SETOF geometry AS
$BODY$DECLARE
  target_proportion numeric;
  n_ret integer := 0;
  loops integer := 0;
  x_min float8;
  y_min float8;
  x_max float8;
  y_max float8;
  srid integer;
  rpoint geometry;
BEGIN
  -- Get envelope and SRID of source polygon
  SELECT ST_XMin(geom), ST_YMin(geom), ST_XMax(geom), ST_YMax(geom), ST_SRID(geom)
    INTO x_min, y_min, x_max, y_max, srid;
  -- Get the area proportion of envelope size to determine if a
  -- result can be returned in a reasonable amount of time
  SELECT ST_Area(geom)/ST_Area(ST_Envelope(geom)) INTO target_proportion;
  RAISE DEBUG 'geom: SRID %, NumGeometries %, NPoints %, area proportion within envelope %',
                srid, ST_NumGeometries(geom), ST_NPoints(geom),
                round(100.0*target_proportion, 2) || '%';
  IF target_proportion < 0.0001 THEN
    RAISE EXCEPTION 'Target area proportion of geometry is too low (%)', 
                    100.0*target_proportion || '%';
  END IF;
  RAISE DEBUG 'bounds: % % % %', x_min, y_min, x_max, y_max;
  
  WHILE n_ret < num_points LOOP
    loops := loops + 1;
    SELECT ST_SetSRID(ST_MakePoint(random()*(x_max - x_min) + x_min,
                                   random()*(y_max - y_min) + y_min),
                      srid) INTO rpoint;
    IF ST_Contains(geom, rpoint) THEN
      n_ret := n_ret + 1;
      RETURN NEXT rpoint;
    END IF;
  END LOOP;
  RAISE DEBUG 'determined in % loops (% efficiency)', loops, round(100.0*num_points/loops, 2) || '%';
END$BODY$
  LANGUAGE plpgsql VOLATILE
  COST 100
  ROWS 1000;
ALTER FUNCTION RandomPointsInPolygon(geometry, integer) OWNER TO postgres;"""



cur.execute(generate_points_query)
conn.commit()

In [ ]:
#create a points table with id, job type, and geom

cur.execute("""
drop table if exists sf_jobs;
create table sf_jobs (blockid10 varchar(17), job_type varchar(10), the_geom geometry);""")
conn.commit()

#iterate cell by cell, check if value > 0, then create points for appropriate census block with the right job 
#then append those to the created table

#this is for the count of each job type. you probably won't need this for households
columns = ['cns01','cns02','cns03','cns04','cns05','cns06','cns07','cns08',
           'cns09','cns10','cns11','cns12','cns13','cns14','cns15','cns16',
           'cns17','cns18','cns19','cns20']

#maybe try 'rows = range(0, len(table))'
rows = range(0,89196)
for r in rows:
    for i in columns:
        if lodes_test2[i][r] > 0:
            block = str(lodes_test2['blockid10'][r])
            job = i
            job_x_block = (i + 'x' + block)
            num_job =  int(lodes_test2[i][r])
            cur.execute("""
DROP TABLE IF EXISTS %s;

CREATE TABLE %s 
(blockid10 varchar(17) DEFAULT '%s',
job_type varchar(10) DEFAULT '%s',
the_geom geometry);

INSERT INTO %s(the_geom)
SELECT RandomPointsInPolygon(geom, %s)
FROM sf_blocks WHERE geoid10 = '%s';""" % (job_x_block,\
                                               job_x_block,\
                                               block,\
                                               job,\
                                               job_x_block,\
                                               num_job,\
                                               block))
            conn.commit()
            
            cur.execute("""
            insert into sf_jobs select * from %s;""" % (job_x_block))
            conn.commit()

            cur.execute("""
            drop table if exists %s""" % job_x_block)
            conn.commit()
            
#if a gid is needed....            
#cur.execute("""
#alter table alameda_jobs 
#add column gid serial""")
#conn.commit()

print ('done')