In [1]:
import sys, os
sys.path.insert(0, os.path.join("..", "..", ".."))
In [2]:
%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib.collections
import geopandas as gpd
import open_cp.network
import open_cp.sources.chicago
import open_cp.geometry
In [3]:
#data_path = os.path.join("/media", "disk", "Data")
data_path = os.path.join("..", "..", "..", "..", "..", "..", "Data")
open_cp.sources.chicago.set_data_directory(data_path)
We load this into a geoPandas data frame, and then convert each geometry LINESTRING into nodes and edges in a graph.
In [4]:
#tiger_path = os.path.join("/media", "disk", "TIGER Data")
tiger_path = os.path.join("..", "..", "..", "..", "..", "..", "Data", "TIGER Data")
filename = os.path.join(tiger_path, "tl_2016_17031_roads")
tiger_frame = gpd.GeoDataFrame.from_file(filename)
chicago = tiger_frame.to_crs({"init":"epsg:3528"})
chicago.head()
Out[4]:
In [5]:
south_side = open_cp.sources.chicago.get_side("South")
mask = chicago.geometry.map(lambda x : x.intersects(south_side))
frame = chicago[mask]
frame.head()
Out[5]:
In [6]:
all_nodes = []
for geo in frame.geometry:
for pt in geo.coords:
all_nodes.append(pt)
b = open_cp.network.PlanarGraphNodeOneShot(all_nodes)
for geo in frame.geometry:
path = list(geo.coords)
b.add_path(path)
b.remove_duplicate_edges()
graph = b.build()
In [7]:
reduced = open_cp.network.simple_reduce_graph(graph)
graph.number_edges, reduced.number_edges
Out[7]:
This (freely available) file is geocoded so that each event lies on the midpoint of a road.
In [8]:
filename = open_cp.sources.chicago.get_default_filename()
timed_points = open_cp.sources.chicago.load(filename, ["BURGLARY"])
timed_points.number_data_points
Out[8]:
In [9]:
timed_points = open_cp.geometry.intersect_timed_points(timed_points, south_side)
timed_points.number_data_points
Out[9]:
In [10]:
fig, ax = plt.subplots(figsize=(12,12))
lc = matplotlib.collections.LineCollection(graph.as_lines(), color="black", linewidth=0.5)
ax.add_collection(lc)
ax.scatter(timed_points.xcoords, timed_points.ycoords)
xmin, ymin, xmax, ymax = *timed_points.bounding_box.min, *timed_points.bounding_box.max
xd, yd = xmax - xmin, ymax - ymin
ax.set(xlim=(xmin-xd/20, xmax+xd/20), ylim=(ymin-yd/20, ymax+yd/20))
None
In [11]:
fig, axes = plt.subplots(ncols=2, figsize=(18,8))
for ax in axes:
lc = matplotlib.collections.LineCollection(graph.as_lines(), color="black", linewidth=0.5)
ax.add_collection(lc)
ax.scatter(timed_points.xcoords, timed_points.ycoords)
axes[0].set(xlim=[358000, 360000], ylim=[570000, 572000])
axes[1].set(xlim=[362000, 364000], ylim=[565000, 567000])
Out[11]:
In [12]:
import pickle, lzma
with lzma.open("input.pic.xz", "wb") as f:
pickle.dump(timed_points, f)
with open("input.graph", "wb") as f:
f.write(graph.dump_bytes())
In [13]:
filename = os.path.join(data_path, "chicago_all_old.csv")
timed_points = open_cp.sources.chicago.load(filename, ["BURGLARY"], type="all")
timed_points.number_data_points
Out[13]:
In [14]:
timed_points = open_cp.geometry.intersect_timed_points(timed_points, south_side)
timed_points.number_data_points
Out[14]:
In [15]:
with lzma.open("input_old.pic.xz", "wb") as f:
pickle.dump(timed_points, f)
with open("input_old.graph", "wb") as f:
f.write(graph.dump_bytes())
In [ ]: