In [1]:
import sys, os
sys.path.insert(0, os.path.join("..", ".."))
In [2]:
%matplotlib inline
import matplotlib.pyplot as plt
import geopandas as gpd
import os
import descartes
import numpy as np
import open_cp.network
import open_cp.plot
import matplotlib.collections
In [4]:
ordnancedir = os.path.join("/media", "disk", "OS_OpenMap")
#ordnancedir = os.path.join("..", "..", "..", "..", "..", "Data", "OS_OpenMap")
openroadsdir = os.path.join(ordnancedir, "oproad_essh_gz", "data")
file = "SE_RoadLink.shp"
se_frame = gpd.GeoDataFrame.from_file(os.path.join(openroadsdir, file))
se_frame.head()
Out[4]:
We notice that each edge has a UUID-like identifier for the start and end node.
However, we find that some nodes are very, very close together.
So it is safe to use the "graph structure" as is, without processing.
In [5]:
node_lookup = dict()
for endid, line in zip(se_frame.endNode, se_frame.geometry):
x,y,z = list(line.coords)[-1]
if endid in node_lookup:
if not (x,y) == node_lookup[endid]:
print(endid, node_lookup[endid], x, y)
raise AssertionError()
node_lookup[endid] = (x,y)
for startid, line in zip(se_frame.startNode, se_frame.geometry):
x,y,z = list(line.coords)[0]
if startid in node_lookup:
if not (x,y) == node_lookup[startid]:
print(startid, node_lookup[startid], x, y)
raise AssertionError()
node_lookup[startid] = (x,y)
In [6]:
node_lookup = dict()
for endid, line in zip(se_frame.endNode, se_frame.geometry):
x,y,z = list(line.coords)[-1]
if (x,y) in node_lookup:
if not endid == node_lookup[(x,y)]:
print(endid, node_lookup[(x,y)], x, y)
raise AssertionError()
node_lookup[(x,y)] = endid
for startid, line in zip(se_frame.startNode, se_frame.geometry):
x,y,z = list(line.coords)[0]
if (x,y) in node_lookup:
if not startid == node_lookup[(x,y)]:
print(startid, node_lookup[(x,y)], x, y)
raise AssertionError()
node_lookup[(x,y)] = startid
In [7]:
int_nodes = set()
end = False
for i, line in enumerate(se_frame.geometry):
lines = list(line.coords)[1:-1]
for x,y,z in lines:
if (x,y) in int_nodes:
print(i, "-->", x, y)
end = True
int_nodes.add((x,y))
if end:
break
In [8]:
se_frame.ix[444]
Out[8]:
In [9]:
b = open_cp.network.PlanarGraphGeoBuilder()
for line in se_frame.geometry:
b.add_path(line.coords)
In [10]:
graph = b.build()
In [11]:
fig, ax = plt.subplots(figsize=(12,12))
lc = matplotlib.collections.LineCollection(graph.as_lines(), color="black", linewidth=0.5)
ax.add_collection(lc)
nodes = list(graph.vertices.values())
nodes = np.asarray(nodes)
ax.scatter(nodes[:,0], nodes[:,1])
xc, yc = 493875.67, 429181.32
si = 1000
ax.set(xlim=[xc - si, xc + si], ylim=[yc - si, yc + si])
None
In [12]:
b = graph.dump_bytes()
with open("OS_Yorkshire.graph", "wb") as f:
f.write(b)
Here we have a lot less to go on: basically just geometry and a "name" for each edge.
What could go wrong if we form a graph in the same way:
In [13]:
tiger_path = os.path.join("/media", "disk", "TIGER Data")
#iger_path = os.path.join("..", "..", "..", "..", "..", "Data", "TIGER Data")
filename = os.path.join(tiger_path, "tl_2016_17031_roads")
tiger_frame = gpd.GeoDataFrame.from_file(filename)
chicago = tiger_frame.to_crs({"init":"epsg:3528"})
chicago.head()
Out[13]:
In [14]:
import pyproj
proj = pyproj.Proj({"init":"epsg:3528"})
In [15]:
start_end_nodes = []
for geo in chicago.geometry:
lines = list(geo.coords)
start_end_nodes.append(lines[0])
start_end_nodes.append(lines[-1])
start_end_nodes = np.asarray(start_end_nodes)
start_end_nodes.shape
Out[15]:
In [16]:
dists = []
odd_points = []
for i, pt in enumerate(start_end_nodes):
diffs = start_end_nodes[i+1:, :] - pt[None,:]
distsq = diffs[:,0]**2 + diffs[:,1]**2
mask = (distsq < 1) & (distsq > 0)
dists.extend(distsq[mask])
if np.any(mask) > 0:
for j, m in enumerate(mask):
if m:
odd_points.append((i,j+i+1))
if len(dists) > 1000000:
raise Bob
In [17]:
# We looked at those cases where the distance was <1, so these small numbers show a clear "gap"
# (and are probably symptomatic of data errors due to rounding in the input data.)
min(dists), max(dists)
Out[17]:
Look at one example to see that the lines should indeed join.
In [18]:
odd_points[0]
Out[18]:
In [19]:
# 50 == start point of 25
# 110545 == end point of 110544//2
chicago.ix[[25, 110544//2]]
Out[19]:
In [20]:
list(chicago.ix[25].geometry.coords)[0], list(chicago.ix[110544//2].geometry.coords)[-1]
Out[20]:
In [21]:
# Unprojected, shows an input rounding error
list(tiger_frame.ix[25].geometry.coords)[0], list(tiger_frame.ix[110544//2].geometry.coords)[-1]
Out[21]:
In [22]:
xc, yc = list(chicago.ix[25].geometry.coords)[0]
print("As lon lat:", proj(xc, yc, inverse=True))
fig, ax = plt.subplots(figsize=(8,8))
lines = open_cp.plot.lines_from_geometry(chicago.geometry)
lc = matplotlib.collections.LineCollection(lines, color="black", linewidth=0.5)
ax.add_collection(lc)
nodes = np.asarray(start_end_nodes)
ax.scatter(start_end_nodes[:,0], start_end_nodes[:,1])
si = 100
ax.set(xlim=[xc - si, xc + si], ylim=[yc - si, yc + si])
None
In [ ]:
nodes, all_nodes = [], []
for geo in chicago.geometry:
for pt in geo.coords:
all_nodes.append(pt)
nodes = list(set(all_nodes))
nodes = np.asarray(nodes)
all_nodes = np.asarray(all_nodes)
nodes.shape, all_nodes.shape
In [ ]:
dists = []
odd_points = []
for i, pt in enumerate(nodes):
diffs = nodes[i+1:, :] - pt[None,:]
distsq = diffs[:,0]**2 + diffs[:,1]**2
assert not np.any(distsq==0)
mask = (distsq < 1) #& (distsq > 0)
dists.extend(distsq[mask])
if np.any(mask) > 0:
for j, m in enumerate(mask):
if m:
odd_points.append((i,j+i+1))
if len(dists) > 1000000:
raise Bob
In [ ]:
dists = np.asarray(dists)
small = dists[dists < 0.5]
print(min(small), max(small))
large = dists[dists >= 0.5]
print(min(large), max(large))
In [ ]:
xc, yc = list(chicago.ix[25].geometry.coords)[0]
print("As lon lat:", proj(xc, yc, inverse=True))
fig, ax = plt.subplots(figsize=(8,8))
lines = open_cp.plot.lines_from_geometry(chicago.geometry)
lc = matplotlib.collections.LineCollection(lines, color="black", linewidth=0.5)
ax.add_collection(lc)
dx = np.random.random(all_nodes.shape[0]) * 2
dy = np.random.random(all_nodes.shape[0]) * 2
ax.scatter(all_nodes[:,0]+dx, all_nodes[:,1]+dy, alpha=1, marker="+")
si = 100
ax.set(xlim=[xc - si, xc + si], ylim=[yc - si, yc + si])
None
In [23]:
all_nodes = []
for geo in chicago.geometry:
for pt in geo.coords:
all_nodes.append(pt)
b = open_cp.network.PlanarGraphNodeOneShot(all_nodes)
for geo in chicago.geometry:
path = list(geo.coords)
b.add_path(path)
graph = b.build()
In [24]:
proj(*all_nodes[28720], inverse=True), proj(*all_nodes[627010], inverse=True)
Out[24]:
A bit of visual investigation with QGIS gives the following. We plot these with an offset: they are identical for time. I think this is due to the same physical road having two logical identities ("State Rte 19" and "Irving Park Rd") and so appearing in the data twice.
In [26]:
mask = (chicago["LINEARID"] == "1104469295862") | (chicago["LINEARID"] == "110380350397")
geo = chicago[mask].geometry
geo
Out[26]:
In [27]:
fig, ax = plt.subplots(figsize=(12,6))
x = np.asarray(list(geo[33248].coords))
lc = matplotlib.collections.LineCollection([x])
ax.add_collection(lc)
x = np.asarray(list(geo[1982].coords)) + np.array([0,100])
lc = matplotlib.collections.LineCollection([x])
ax.add_collection(lc)
xmin, ymin, xmax, ymax = chicago[mask].total_bounds
d = 500
ax.set(xlim=[xmin-d, xmax+d], ylim=[ymin-d, ymax+d])
None
In [28]:
b.remove_duplicate_edges()
graph = b.build()
In [29]:
fig, ax = plt.subplots(figsize=(12,12))
lc = matplotlib.collections.LineCollection(graph.as_lines(), color="black", linewidth=0.5)
ax.add_collection(lc)
xmin, ymin, xmax, ymax = chicago.total_bounds
xd, yd = xmax - xmin, ymax - ymin
ax.set(xlim=(xmin-xd/20, xmax+xd/20), ylim=(ymin-yd/20, ymax+yd/20))
None
In [30]:
b = graph.dump_bytes()
with open("chicago.graph", "wb") as f:
f.write(b)
In [31]:
tiger_path = os.path.join("/media", "disk", "TIGER Data")
#iger_path = os.path.join("..", "..", "..", "..", "..", "Data", "TIGER Data")
filename = os.path.join(tiger_path, "tl_2016_17031_edges")
tiger_frame = gpd.GeoDataFrame.from_file(filename)
tiger_frame = tiger_frame.to_crs({"init":"epsg:3528"})
chicago = gpd.GeoDataFrame(tiger_frame["FULLNAME"])
chicago.geometry = tiger_frame.geometry
chicago.crs = tiger_frame.crs
chicago.head()
Out[31]:
Look at start and end nodes. Again, we find that these do not exactly match.
In [32]:
start_end_nodes = []
for geo in chicago.geometry:
lines = list(geo.coords)
start_end_nodes.append(lines[0])
start_end_nodes.append(lines[-1])
start_end_nodes = np.asarray(start_end_nodes)
start_end_nodes.shape
Out[32]:
In [33]:
dists = []
odd_points = []
for i, pt in enumerate(start_end_nodes):
diffs = start_end_nodes[i+1:, :] - pt[None,:]
distsq = diffs[:,0]**2 + diffs[:,1]**2
mask = (distsq < 1) & (distsq > 0)
dists.extend(distsq[mask])
for j in np.arange(0, len(mask))[mask]:
odd_points.append((i,j+i+1))
if len(dists) > 1000000:
raise Bob
In [34]:
len(odd_points), min(dists), max(dists)
Out[34]:
In [35]:
odd_points[0]
Out[35]:
In [36]:
# 27 == end point of 27//2
# 153333 == end point of 153333//2
chicago.ix[[27//2, 153333//2]]
Out[36]:
In [37]:
list(chicago.geometry[13].coords)[-1], list(chicago.geometry[76666].coords)[-1]
Out[37]:
In [38]:
all_nodes = []
for geo in chicago.geometry:
for pt in geo.coords:
all_nodes.append(pt)
b = open_cp.network.PlanarGraphNodeOneShot(all_nodes)
for geo in chicago.geometry:
path = list(geo.coords)
b.add_path(path)
graph = b.build()
In [39]:
fig, ax = plt.subplots(figsize=(12,12))
lc = matplotlib.collections.LineCollection(graph.as_lines(), color="black", linewidth=0.5)
ax.add_collection(lc)
xmin, ymin, xmax, ymax = chicago.total_bounds
xd, yd = xmax - xmin, ymax - ymin
ax.set(xlim=(xmin-xd/20, xmax+xd/20), ylim=(ymin-yd/20, ymax+yd/20))
None
In [40]:
import shapely.geometry
geo = []
for (v1,v2) in graph.edges:
l = shapely.geometry.LineString([graph.vertices[v1], graph.vertices[v2]])
geo.append(l)
frame = gpd.GeoDataFrame({"edge" : list(range(len(graph.edges)))})
frame.geometry = geo
frame.crs = {"init":"epsg:3528"}
frame.head()
Out[40]:
In [41]:
frame.to_file("roads_to_graph")
In the above diagram, we wish to find $t$. We know that $v = b-a$, that $u\cdot v=0$, and that $x-a = tv + u$. Thus $$ (x-a) \cdot v = t\|v\|^2 \quad\implies\quad t = \|v\|^{-2} (x-a)\cdot v. $$
Rather than (tediously) explain how our code work, we just summarise it:
rtree Python package, then we put the vertices into an R-Treenumpy code.
In [42]:
with open("OS_Yorkshire.graph", "rb") as f:
graph = open_cp.network.PlanarGraph.from_bytes(f.read())
In [43]:
fig, ax = plt.subplots(figsize=(12,12))
lc = matplotlib.collections.LineCollection(graph.as_lines(), color="black", linewidth=0.5)
ax.add_collection(lc)
xc, yc = 493875.67, 429181.32
si = 1000
ax.set(xlim=[xc - si, xc + si], ylim=[yc - si, yc + si])
xcs = np.random.random(50) * si * 2 - si + xc
ycs = np.random.random(50) * si * 2 - si + yc
ax.scatter(xcs, ycs)
xxcs, yycs = [], []
for x, y in zip(xcs, ycs):
edge, t = graph.project_point_to_graph(x, y)
xx, yy = graph.edge_to_coords(*edge, t)
xxcs.append(xx)
yycs.append(yy)
ax.scatter(xxcs, yycs)
lines = [((x,y),(xx,yy)) for x,y,xx,yy in zip(xcs, ycs, xxcs, yycs)]
lc = matplotlib.collections.LineCollection(lines, color="blue")
ax.add_collection(lc)
Out[43]:
In [44]:
with open("chicago.graph", "rb") as f:
graph = open_cp.network.PlanarGraph.from_bytes(f.read())
In [45]:
fig, ax = plt.subplots(figsize=(12,12))
lc = matplotlib.collections.LineCollection(graph.as_lines(), color="black", linewidth=0.5)
ax.add_collection(lc)
x, y = 355000, 565000
size = 1000
ax.set(xlim=(x-size,x+size), ylim=(y-size,y+size))
xcs = np.random.random(50) * size * 2 - size + x
ycs = np.random.random(50) * size * 2 - size + y
ax.scatter(xcs, ycs)
xxcs, yycs = [], []
for x, y in zip(xcs, ycs):
edge, t = graph.project_point_to_graph(x, y)
xx, yy = graph.edge_to_coords(*edge, t)
xxcs.append(xx)
yycs.append(yy)
ax.scatter(xxcs, yycs)
lines = [((x,y),(xx,yy)) for x,y,xx,yy in zip(xcs, ycs, xxcs, yycs)]
lc = matplotlib.collections.LineCollection(lines, color="blue")
ax.add_collection(lc)
None
In [ ]: