In [1]:
import time
import collections
import sys
sys.path.append('/Users/jay/github/pysal/')
import pysal as ps
%load_ext line_profiler
#Just to confirm that I am on the newest version
print ps.version
In [22]:
def queen(fname):
#ta = time.time()
#t1 = time.time()
#fname = '10000_poly.shp'
shpFileObject = ps.open(fname)
if shpFileObject.type != ps.cg.Polygon:
print "FAILED"
numPoly = len(shpFileObject)
#t2 = time.time()
#print "Opening took {} seconds.".format(t2-t1)
#t1 = time.time()
w = collections.defaultdict(set)
#w = {}
#for i in range(numPoly):
#w[i] = set()
#t2 = time.time()
#print "Preallocating the W took {} seconds.".format(t2-t1)
#t1 = time.time()
geoms = collections.deque()
offsets = collections.deque() #len(offsets) == len(geoms)
c = 0 #PolyID Counter
for n in range(numPoly):
verts = shpFileObject.get(n).vertices
offsets.extend( [c] * len(verts) )
geoms.extend(verts)
c += 1
#t2 = time.time()
#print "Extending took {} seconds.".format(t2-t1)
#t1 = time.time()
items = collections.defaultdict(set)
for i, vertex in enumerate(geoms):
items[vertex].add(offsets[i])
shared_vertices = []
for item, location in items.iteritems():
if len(location) > 1:
shared_vertices.append(location)
#t2 = time.time()
#print "Shared vertex identification took {} seconds".format(t2-t1)
#Shared Vertices is a list, by index of those polys that share a vertex.
#t1 = time.time()
#print shared_vertices
for vert_set in shared_vertices:
for v in vert_set:
w[v] = w[v] | vert_set
#try:
#w[v].remove(v)
#except:
#pass
#t2 = time.time()
#tb = time.time()
#print "Total processing time was {} seconds.".format(tb-ta)
return w
w = queen('2500_poly.shp')
In [3]:
%lprun -f queen queen('2500_poly.shp')
This is what Serge and I chatted about last week. We make two passes over the data.
Questions:
In [23]:
def onestep(fname):
shpFileObject = ps.open(fname)
if shpFileObject.type != ps.cg.Polygon:
print "FAILED"
numPoly = len(shpFileObject)
vertices = collections.defaultdict(set)
for i, s in enumerate(shpFileObject):
newvertices = s.vertices[:-1]
for v in newvertices:
vertices[v].add(i)
w = collections.defaultdict(set)
for neighbors in vertices.itervalues():
for neighbor in neighbors:
w[neighbor] = w[neighbor] | neighbors
return w
neww = onestep('2500_poly.shp')
In [25]:
assert w == neww
print w[0]
print neww[0]
In [19]:
%lprun -f onestep onestep('2500_poly.shp')
In [3]:
import pysal as ps
t1 = time.time()
w = ps.queen_from_shapefile(fname)
t2 = time.time()
print t2-t1
In [41]:
#Check to see that these two dicts are identical
for k, v in w.neighbors.iteritems():
if sorted(improved_w[k]) != sorted(v):
print improved_w[k], v
Rook
In [43]:
def rook(fname):
ta = time.time()
t1 = time.time()
#fname = '2500_poly.shp'
shpFileObject = ps.open(fname)
if shpFileObject.type != ps.cg.Polygon:
print "FAILED"
numPoly = len(shpFileObject)
t2 = time.time()
#print "Opening took {} seconds.".format(t2-t1)
t1 = time.time()
w = {}
for i in range(numPoly):
w[i] = set()
t2 = time.time()
#print "Preallocating the W took {} seconds.".format(t2-t1)
t1 = time.time()
geoms = []
offsets = [] #len(offsets) == len(geoms)
c = 0 #PolyID Counter
for n in range(numPoly):
verts = shpFileObject.get(n).vertices
for v in range(len(verts)-1):
geoms.append(tuple(sorted([verts[v], verts[v+1]])))
offsets += [c] * (len(verts)-1)
c += 1
t2 = time.time()
#print "Reading took {} seconds.".format(t2-t1)
t1 = time.time()
items = collections.defaultdict(set)
for i, item in enumerate(geoms):
items[item].add(offsets[i])
shared_vertices = []
for item, location in items.iteritems():
if len(location) > 1:
shared_vertices.append(location)
t2 = time.time()
#print "Shared vertex identification took {} seconds".format(t2-t1)
#Shared Vertices is a list, by index of those polys that share a vertex.
t1 = time.time()
for vert_set in shared_vertices:
for v in vert_set:
w[v] = w[v] | vert_set
try:
w[v].remove(v)
except:
pass
t2 = time.time()
t1 = time.time()
for k, v in w.iteritems():
w[k] = list(v)
t2 = time.time()
tb = time.time()
print "Total processing time was {} seconds.".format(tb-ta)
In [33]:
import pysal as ps
t1 = time.time()
w = ps.rook_from_shapefile(fname)
t2 = time.time()
print t2-t1
print w[2499]
In [34]:
#Check to see that these two dicts are identical
for k, v in w.neighbors.iteritems():
if sorted(improved_w[k]) != sorted(v):
print improved_w[k], v
In [18]:
fnames = ['TestData/62500_poly.shp']
for f in fnames:
#Open the data source once to get the file pointer
touch = open(f)
touch.close()
print f
queen(f)
t1 = time.time()
ps.queen_from_shapefile(f)
t2 = time.time()
print "Original queen took {} seconds".format(t2 - t1)
rook(f)
t1 = time.time()
ps.rook_from_shapefile(f)
t2 = time.time()
print "Original rook took {} seconds".format(t2 - t1)
print
In [1]:
queen_fast = np.array([0.45458817482,
1.27110695839,
3.097905159,
5.1691391468,
6.72036194801,
7.85625100136,
11.6823840141,
12.812087059])
queen_slow = np.array([1.07005596161,
4.87641596794,
16.385130167,
37.7573301792,
60.615033865,
80.6158120632,
158.111407995,
191.939512968])
rook_fast = np.array([0.727967023849,
1.79104304314,
3.43500900269,
6.94867897034,
8.01557397842,
9.91741108894,
14.2835550308,
16.4100689888])
rook_slow = np.array([1.16798591614,
6.33092308044,
18.7772500515,
44.9458069801,
60.5921809673,
88.5189139843,
162.452570915,
203.032716036])
x = np.array([2500,10000,22500,40000,50000,62500,90000,100000])
In [9]:
plot(x, queen_fast, label='Fast')
plot(x, queen_slow, label='Slow')
xlabel('Num. Polys')
ylabel('Time (sec.)')
title('Queen')
legend(loc=2)
grid()
In [11]:
plot(x, rook_fast, label='Fast')
plot(x, rook_slow, label='Slow')
xlabel('Num. Polys')
ylabel('Time (sec.)')
title('Rook')
legend(loc=2)
grid()
In [14]:
plot(x, queen_slow / queen_fast, label='Queen Speedup')
plot(x, rook_slow / rook_fast, label='Rook Speedup')
xlabel('Num. Polys')
ylabel('Speedup')
title('Rook')
legend(loc=4)
grid()
In [ ]: