In [1]:
import os, sys
sys.path.insert(0, os.path.abspath(os.path.join("..", "..")))
In [2]:
%matplotlib inline
import matplotlib.pyplot as plt
import open_cp.scripted
import open_cp.scripted.analysis as analysis
In [3]:
loaded = open_cp.scripted.Loader("naive_preds.pic.xz")
In [4]:
loaded.timed_points.time_range
Out[4]:
In [5]:
fig, axes = plt.subplots(ncols=2, figsize=(16,7))
analysis.plot_data_scatter(loaded, axes[0])
analysis.plot_data_grid(loaded, axes[1])
In [6]:
next(iter(loaded))
Out[6]:
In [7]:
times = [x[1] for x in loaded]
preds = [x[2] for x in loaded]
In [8]:
fig, axes = plt.subplots(ncols=2, figsize=(16,7))
for ax, i in zip(axes, [0, 60]):
analysis.plot_prediction(loaded, preds[i], ax)
ax.set_title(times[i])
In [9]:
import datetime
import open_cp.naive
import numpy as np
import pandas as pd
import open_cp.evaluation
In [10]:
start = datetime.datetime(2016, 10, 1)
our_preds = []
while start < datetime.datetime(2017, 1, 1):
predictor = open_cp.naive.CountingGridKernel(loaded.grid.xsize, region=loaded.grid.region())
mask = loaded.timed_points.timestamps < start
predictor.data = loaded.timed_points[mask]
pred = predictor.predict()
pred.mask_with(loaded.grid)
pred = pred.renormalise()
our_preds.append(pred)
start += datetime.timedelta(days=1)
In [11]:
for i in range(len(our_preds)):
np.testing.assert_allclose(our_preds[i].intensity_matrix, preds[i].intensity_matrix)
In [12]:
frame = pd.read_csv("naive.csv")
frame.head()
Out[12]:
In [13]:
frame.tail()
Out[13]:
In [14]:
coverages = list(range(1,101))
start = datetime.datetime(2016, 10, 1)
rows = []
for pred in our_preds:
end = start + datetime.timedelta(days=1)
mask = (loaded.timed_points.timestamps >= start) & (loaded.timed_points.timestamps < end)
rows.append(open_cp.evaluation.hit_rates(pred, loaded.timed_points[mask], coverages))
start = end
In [15]:
for i in range(len(rows)):
np.testing.assert_allclose(frame.ix[i][3:].values.astype(np.float), list(rows[i].values()))
In [16]:
def plot_mean_hitrate(ax, frame, xrange):
coverages = list(range(1,101))
data= {}
for pred_type in frame.Predictor.unique():
data[pred_type] = {}
f = frame[frame.Predictor == pred_type].describe()
for cov in coverages:
r = f["{}%".format(cov)]
data[pred_type][cov] = r["mean"], (r["std"] / np.sqrt(r["count"]))
for pred_type in data:
series = data[pred_type]
x = np.sort(list(xrange))
y = np.asarray([series[xx][0] for xx in x])
ax.plot(x, y, label=pred_type)
dy = np.asarray([series[xx][1] for xx in x])
ax.fill_between(x, y-dy, y+dy, alpha=0.5)
ax.legend()
In [17]:
fig, ax = plt.subplots(figsize=(12,8))
plot_mean_hitrate(ax, frame, range(1,101))
ax.set(xlabel="Coverage (%)", ylabel="Hit rate")
None
In [18]:
fig, ax = plt.subplots(figsize=(12,8))
plot_mean_hitrate(ax, frame, range(1,21))
ax.set(xlabel="Coverage (%)", ylabel="Hit rate")
None
Use a beta prior
In [19]:
betas = analysis.hit_counts_to_beta("naive_counts.csv")
In [20]:
fig, ax = plt.subplots(figsize=(12,8))
analysis.plot_betas(betas, ax)
In [21]:
fig, ax = plt.subplots(figsize=(12,8))
analysis.plot_betas(betas, ax, range(1,21))
In [22]:
tps = loaded.timed_points.bin_timestamps(datetime.datetime(2016,1,1), datetime.timedelta(days=1))
import collections, statistics
c = collections.Counter(tps.timestamps)
statistics.mean(c.values())
Out[22]:
So we have about 5 crime events a day, on average.
In [23]:
import scipy.special
def BetaBinom(alpha,beta,n,k):
"""http://www.channelgrubb.com/blog/2015/2/27/beta-binomial-in-python"""
part_1 = scipy.special.comb(n,k)
part_2 = scipy.special.betaln(k+alpha,n-k+beta)
part_3 = scipy.special.betaln(alpha,beta)
result = (np.log(part_1) + part_2)- part_3
return np.exp(result)
fig, axes = plt.subplots(ncols=len(betas), figsize=(16,5))
n = 5
for ax, key in zip(axes, betas):
beta = betas[key][5]
p = [BetaBinom(*beta.args,n,k) for k in range(0,n+1)]
ax.bar(np.arange(n+1), p)
ax.set(xlabel="Number of crimes captured", ylabel="Probability")
ax.set_title("{}; {} total events.".format(key, n))
These plots show the probability of capturing $x$ events out of the 5 total events. This sort of puts the difference in perspective-- it's pretty small!
In [ ]: