The RandomPointsinPolygon function is from: http://trac.osgeo.org/postgis/wiki/UserWikiRandomPoint
The trick is to call this on each polygon. Below is how I did it to generate a job point w/ in each census block based on the job counts from the lodes7 data. It probably could be written better, but if you need more markdown / info / whatever, let me know.
In [ ]:
'''
CREATE OR REPLACE FUNCTION RandomPointsInPolygon(geom geometry, num_points integer)
RETURNS SETOF geometry AS
$BODY$DECLARE
target_proportion numeric;
n_ret integer := 0;
loops integer := 0;
x_min float8;
y_min float8;
x_max float8;
y_max float8;
srid integer;
rpoint geometry;
BEGIN
-- Get envelope and SRID of source polygon
SELECT ST_XMin(geom), ST_YMin(geom), ST_XMax(geom), ST_YMax(geom), ST_SRID(geom)
INTO x_min, y_min, x_max, y_max, srid;
-- Get the area proportion of envelope size to determine if a
-- result can be returned in a reasonable amount of time
SELECT ST_Area(geom)/ST_Area(ST_Envelope(geom)) INTO target_proportion;
RAISE DEBUG 'geom: SRID %, NumGeometries %, NPoints %, area proportion within envelope %',
srid, ST_NumGeometries(geom), ST_NPoints(geom),
round(100.0*target_proportion, 2) || '%';
WHILE n_ret < num_points LOOP
loops := loops + 1;
SELECT ST_SetSRID(ST_MakePoint(random()*(x_max - x_min) + x_min,
random()*(y_max - y_min) + y_min),
srid) INTO rpoint;
IF ST_Contains(geom, rpoint) THEN
n_ret := n_ret + 1;
RETURN NEXT rpoint;
END IF;
END LOOP;
RAISE DEBUG 'determined in % loops (% efficiency)', loops, round(100.0*num_points/loops, 2) || '%';
END$BODY$
LANGUAGE plpgsql VOLATILE
COST 100
ROWS 1000;
ALTER FUNCTION RandomPointsInPolygon(geometry, integer) OWNER TO postgres;
'''
In [ ]:
'''
with a as(
select a.parcel_id, a.parcel_sqft, a.centroid_x, a.centroid_y, b.the_geom from parcels a join parcels_spatial b on a.parcel_id = b.parcel_id
), b as(
SELECT parcel_id, RandomPointsInPolygon(the_geom, 10) as randpoint
FROM a WHERE (parcel_sqft >= 217800) and (parcel_sqft < 1406800)
), c as(
SELECT parcel_id, RandomPointsInPolygon(the_geom, 100) as randpoint
FROM a WHERE (parcel_sqft >= 1406800)
), d as (
select parcel_id, round(ST_X(ST_Centroid(the_geom))) as x, round(ST_Y(ST_Centroid(the_geom))) as y from a where parcel_sqft < 217800
), e as (
select parcel_id, round(ST_X(randpoint)) as x, round(ST_Y(randpoint)) as y from b
), f as (
select parcel_id, round(ST_X(randpoint)) as x, round(ST_Y(randpoint)) as y from c
), g as (
select * from d
UNION ALL
select * from e
UNION ALL
select * from f
)
select * into parcel_coords from g
'''
In [ ]:
import pandas as pd
import numpy as np
import psycopg2
conn_string = "host='localhost' dbname='census' user='ntapia' password='' port=5432"
conn=psycopg2.connect(conn_string)
cur = conn.cursor()
In [ ]:
create_function_query = """CREATE OR REPLACE FUNCTION RandomPointsInPolygon(geom geometry, num_points integer)
RETURNS SETOF geometry AS
$BODY$DECLARE
target_proportion numeric;
n_ret integer := 0;
loops integer := 0;
x_min float8;
y_min float8;
x_max float8;
y_max float8;
srid integer;
rpoint geometry;
BEGIN
-- Get envelope and SRID of source polygon
SELECT ST_XMin(geom), ST_YMin(geom), ST_XMax(geom), ST_YMax(geom), ST_SRID(geom)
INTO x_min, y_min, x_max, y_max, srid;
-- Get the area proportion of envelope size to determine if a
-- result can be returned in a reasonable amount of time
SELECT ST_Area(geom)/ST_Area(ST_Envelope(geom)) INTO target_proportion;
RAISE DEBUG 'geom: SRID %, NumGeometries %, NPoints %, area proportion within envelope %',
srid, ST_NumGeometries(geom), ST_NPoints(geom),
round(100.0*target_proportion, 2) || '%';
IF target_proportion < 0.0001 THEN
RAISE EXCEPTION 'Target area proportion of geometry is too low (%)',
100.0*target_proportion || '%';
END IF;
RAISE DEBUG 'bounds: % % % %', x_min, y_min, x_max, y_max;
WHILE n_ret < num_points LOOP
loops := loops + 1;
SELECT ST_SetSRID(ST_MakePoint(random()*(x_max - x_min) + x_min,
random()*(y_max - y_min) + y_min),
srid) INTO rpoint;
IF ST_Contains(geom, rpoint) THEN
n_ret := n_ret + 1;
RETURN NEXT rpoint;
END IF;
END LOOP;
RAISE DEBUG 'determined in % loops (% efficiency)', loops, round(100.0*num_points/loops, 2) || '%';
END$BODY$
LANGUAGE plpgsql VOLATILE
COST 100
ROWS 1000;
ALTER FUNCTION RandomPointsInPolygon(geometry, integer) OWNER TO postgres;"""
cur.execute(generate_points_query)
conn.commit()
In [ ]:
#create a points table with id, job type, and geom
cur.execute("""
drop table if exists sf_jobs;
create table sf_jobs (blockid10 varchar(17), job_type varchar(10), the_geom geometry);""")
conn.commit()
#iterate cell by cell, check if value > 0, then create points for appropriate census block with the right job
#then append those to the created table
#this is for the count of each job type. you probably won't need this for households
columns = ['cns01','cns02','cns03','cns04','cns05','cns06','cns07','cns08',
'cns09','cns10','cns11','cns12','cns13','cns14','cns15','cns16',
'cns17','cns18','cns19','cns20']
#maybe try 'rows = range(0, len(table))'
rows = range(0,89196)
for r in rows:
for i in columns:
if lodes_test2[i][r] > 0:
block = str(lodes_test2['blockid10'][r])
job = i
job_x_block = (i + 'x' + block)
num_job = int(lodes_test2[i][r])
cur.execute("""
DROP TABLE IF EXISTS %s;
CREATE TABLE %s
(blockid10 varchar(17) DEFAULT '%s',
job_type varchar(10) DEFAULT '%s',
the_geom geometry);
INSERT INTO %s(the_geom)
SELECT RandomPointsInPolygon(geom, %s)
FROM sf_blocks WHERE geoid10 = '%s';""" % (job_x_block,\
job_x_block,\
block,\
job,\
job_x_block,\
num_job,\
block))
conn.commit()
cur.execute("""
insert into sf_jobs select * from %s;""" % (job_x_block))
conn.commit()
cur.execute("""
drop table if exists %s""" % job_x_block)
conn.commit()
#if a gid is needed....
#cur.execute("""
#alter table alameda_jobs
#add column gid serial""")
#conn.commit()
print ('done')