Using our favour source, Chicago: https://data.cityofchicago.org/Public-Safety/Crimes-2001-to-present/ijzp-q8t2
Geometry from https://data.cityofchicago.org/Facilities-Geographic-Boundaries/Boundaries-Community-Areas-current-/cauq-8yn6
In [1]:
import os, csv, lzma
import numpy as np
import open_cp.sources.chicago
import geopandas as gpd
import pyproj
import shapely.geometry
In [2]:
#datadir = os.path.join("/media", "OTHERDATA")
datadir = os.path.join("..", "..", "..", "..", "Data")
open_cp.sources.chicago.set_data_directory(datadir)
polygon = open_cp.sources.chicago.get_side("South")
In [3]:
frame = gpd.GeoDataFrame({"name":["South Side"]})
frame.geometry = [polygon]
frame.crs = {"init":"epsg:2790"}
frame
Out[3]:
In [4]:
frame.to_file("SouthSide")
In [6]:
def gen():
filename = os.path.join(datadir, "chicago_all_dec2017.csv.xz")
with lzma.open(filename, "rt") as f:
yield from csv.reader(f)
rows = gen()
print(next(rows))
print(next(rows))
In [ ]:
proj = pyproj.Proj({"init":"epsg:2790"})
rows = gen()
header = next(rows)
choices = []
for row in rows:
if row[19] is "":
continue
if row[2][6:10] != "2016":
continue
x, y = proj(float(row[20]), float(row[19]))
pt = shapely.geometry.Point(x, y)
if polygon.intersects(pt):
choices.append(row)
In [ ]:
want = np.sort(np.random.choice(len(choices), 1000, replace=False))
row = next(gen())
out = []
out.append([row[1], row[2], row[3], row[5], row[19], row[20]])
for i, row in enumerate(choices):
if i in want:
out.append([row[1], row[2], row[3], row[5], row[19], row[20]])
In [ ]:
with open("example.csv", "w", newline="") as f:
csv.writer(f).writerows(out)
In [ ]: