We examine the sensitivity of current best model (Gear-Specific, Multi-Window Logistic Model with is-daylight) with respect to degredation of the AIS data.
We train as normal, then progessively randomly dropout more and more of the test data and see how that effects the resulting predicted results. We restrict ourselves to looking at a subset of the initial AIS points where there were 100-200 AIS points in the previous 24 hour period. This allows us to to see the effects of dropout at consistent point bins.
Before this note book is run, the dropped out datasets must be created by running:
python scripts/add_all_features.py --keep 0.8
python scripts/add_all_features.py --keep 0.6
python scripts/add_all_features.py --keep 0.4
python scripts/add_all_features.py --keep 0.2
python scripts/add_all_features.py --keep 0.1
python scripts/add_all_features.py --keep 0.05
python scripts/add_all_features.py --keep 0.02
python scripts/add_all_features.py --keep 0.01
Step 1 – import required modules and define some helper functions:
In [1]:
%matplotlib inline
from __future__ import print_function, division
import sys
sys.path.append('..')
from vessel_scoring import data, utils
from vessel_scoring.models import train_model_on_data
from vessel_scoring.evaluate_model import evaluate_model, compare_models
from IPython.core.display import display, HTML, Markdown
import numpy as np
from sklearn import metrics
import matplotlib.pyplot as plt
In [2]:
from vessel_scoring.logistic_model import LogisticModel
def make_model(seed=4321):
return LogisticModel(colspec=dict(
windows=[1800, 3600, 10800, 21600, 43200, 86400],
measures=['measure_daylight', 'measure_speed']), order=6, random_state=seed)
In [3]:
def load_data(suffix='', seed=4321, even_split=True):
"""Load train / test data
This loads the datasets as they are normally used in training / testing.
"""
# Data supplied by Kristina
_, train_lline, valid_lline, test_lline = data.load_dataset_by_vessel(
'../datasets/kristina_longliner.measures{}.npz'.format(suffix),
even_split=even_split, seed=seed)
_, train_trawl, valid_trawl, test_trawl = data.load_dataset_by_vessel(
'../datasets/kristina_trawl.measures{}.npz'.format(suffix),
even_split=even_split, seed=seed)
_, train_pseine, valid_pseine, test_pseine = data.load_dataset_by_vessel(
'../datasets/kristina_ps.measures{}.npz'.format(suffix),
even_split=even_split, seed=seed)
# Slow transits (used to train models to avoid classifying slow transits as fishing)
TRANSIT_WEIGHT = 10
x_tran, xtrain_tran, xcross_tran, xtest_tran = data.load_dataset_by_vessel(
'../datasets/slow-transits.measures{}.npz'.format(suffix),
even_split=False, seed=seed)
xtrain_tran = utils.clone_subset(xtrain_tran, test_lline.dtype)
xcross_tran = utils.clone_subset(xcross_tran, test_lline.dtype)
xtest_tran = utils.clone_subset(xtest_tran, test_lline.dtype)
train_tran = np.concatenate([xtrain_tran, xcross_tran] * TRANSIT_WEIGHT)
train = {'longliner': np.concatenate([train_lline, valid_lline, train_tran]),
'trawler': np.concatenate([train_trawl, valid_trawl, train_tran]),
'purse_seine': np.concatenate([train_pseine, valid_pseine, train_tran])}
test = {'longliner': test_lline,
'trawler': test_trawl,
'purse_seine': test_pseine}
return train, test
In [4]:
def _load_test_data(path, mmsi, even_split=True):
test_data, _, _, _ = data.load_dataset_by_vessel(path, even_split=even_split)
mask = np.zeros(test_data.shape, dtype='bool')
for m in mmsi:
mask |= (test_data['mmsi'] == m)
return test_data[mask]
def load_test_data(suffix, mmsi_map, even_split=True):
# Data supplied by Kristina
test_lline = _load_test_data(
'../datasets/kristina_longliner.measures{}.npz'.format(suffix),
mmsi_map['longliner'], even_split=even_split)
test_trawl = _load_test_data(
'../datasets/kristina_trawl.measures{}.npz'.format(suffix),
mmsi_map['trawler'], even_split=even_split)
test_pseine = _load_test_data(
'../datasets/kristina_ps.measures{}.npz'.format(suffix),
mmsi_map['purse_seine'], even_split=even_split)
return {'longliner': test_lline,
'trawler': test_trawl,
'purse_seine': test_pseine}
In [5]:
def _fishing_hours_per_mmsi(data, preds, mask, max_gap):
# assumes single vessel (mmmsi)
indices = np.argsort(data['timestamp'])
data = data[indices]
preds = preds[indices]
thresholded = (preds > 0.5)
dt = (data['timestamp'][1:] - data['timestamp'][:-1]) / (60 * 60)
dt = np.minimum(dt, max_gap)
avg_pred = 0.5 * (preds[1:] + preds[:-1])
avg_thresh = 0.5 * (thresholded[1:] + thresholded[:-1])
avg_nominal = 0.5 * (data['classification'][1:] + data['classification'][:-1])
if mask is not None:
dt *= (mask[1:] + mask[:-1]) / 2.0
return (avg_pred * dt).sum(), (avg_thresh * dt).sum(), (avg_nominal * dt).sum()
def fishing_hours(data, preds, mask=None, max_gap=24):
mmsi = set(data['mmsi'])
pred_hours = thresh_hours = nominal_hours = 0
for m in mmsi:
mmsi_mask = (data['mmsi'] == m)
p_hours, t_hours, n_hours = _fishing_hours_per_mmsi(data[mmsi_mask],
preds[mmsi_mask],
mask=mask,
max_gap=max_gap)
pred_hours += p_hours
thresh_hours += t_hours
nominal_hours += n_hours
return pred_hours, thresh_hours, nominal_hours
To create the features:
python scripts/add_all_features.py --keep 0.8
python scripts/add_all_features.py --keep 0.6
python scripts/add_all_features.py --keep 0.4
python scripts/add_all_features.py --keep 0.2
python scripts/add_all_features.py --keep 0.1
python scripts/add_all_features.py --keep 0.05
python scripts/add_all_features.py --keep 0.02
python scripts/add_all_features.py --keep 0.01
We look at the effec on the points that originally had 100-200 PPD at various dropouts. This lets us state that the approximate points per day of the dropped out points is $100\alpha - 200\alpha$ where $\alpha$ is the fraction of the points that are not dropped.
In [10]:
EVEN_SPLIT = True
keep_probs = [1, 0.8, 0.6, 0.4, 0.2, 0.1, 0.05, 0.02, 0.01]
# Load the undropped out data. `train_data` is used for training
# while test_data is used for the \alpha = 1 case.
train_data, test_data = load_data(even_split=EVEN_SPLIT)
mmsi_map = {x : set(test_data[x]['mmsi']) for x in ['purse_seine', 'trawler', 'longliner']}
# Load dropped out data for all alphas in the `keeps_probs` other
# than 1.
drop_test = {1: load_test_data('', mmsi_map)}
for kp in keep_probs[1:]:
suffix = '-{}'.format(str(kp).replace('.', ''))
drop_test[kp] = load_test_data(suffix=suffix, mmsi_map=mmsi_map, even_split=EVEN_SPLIT)
predictions = {}
for gear in ['purse_seine', 'trawler', 'longliner']:
mdl = make_model()
train_model_on_data(mdl, train_data[gear])
# find points in undropped test_data where there are between 100 and 200 ppd
full_point_set = drop_test[1][gear]
fps_counts = full_point_set['measure_count_86400']
test_points = full_point_set[(fps_counts >= 100) & (fps_counts <= 200)]
display(HTML("<h2>{}</h2>".format(gear.replace('_', ' ').title())))
predictions[gear] = []
for kp in keep_probs:
raw_td = drop_test[kp][gear]
# Make a map of (mmsi, timestamp) -> location in dropped out data
tdmap = {(x['mmsi'], x['timestamp']) : i for (i, x) in enumerate(raw_td)}
tdlist = []
for x in test_points:
key = (x['mmsi'], x['timestamp'])
if key in tdmap:
tdlist.append(raw_td[tdmap[key]])
td = np.array(tdlist)
raw = mdl.predict_proba(td)[:,1]
predictions[gear].append((kp, td, raw, (raw > 0.5), td['classification'] > 0.5))
lines = ["|PPD-low|PPD-high|Recall|Precision|F1-Score|ROC-AUC|",
"|-----|----|------|---------|--------|-------|"]
for kp, td, raw, pred, actual in predictions[gear]:
fpr, tpr, thresholds = metrics.roc_curve(actual, raw)
auc = metrics.auc(fpr, tpr)
lines.append("|{}|{}|{:.2f}|{:.2f}|{:.2f}|{:.2f}|".format(int(100 * kp), int(200 * kp),
metrics.recall_score(actual, pred),
metrics.precision_score(actual, pred),
metrics.f1_score(actual, pred),
auc))
display(Markdown('\n'.join(lines)))
display(HTML("<hr/>"))
In [7]:
for gear in ['purse_seine', 'trawler', 'longliner']:
display(HTML("<h2>{}</h2>".format(gear.replace('_', ' ').title())))
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(12,8))
ppd = []
prec = []
recall = []
pred_hours = []
thresh_hours = []
nom_hours = []
for kp, td, raw, pred, actual in predictions[gear]:
ppd.append(150 * kp)
recall.append(metrics.recall_score(actual, pred))
prec.append(metrics.precision_score(actual, pred))
pred, thresh, nom = fishing_hours(td, raw)
pred_hours.append(pred)
thresh_hours.append(thresh)
nom_hours.append(nom)
ax1.plot(ppd, prec)
ax1.set_ylim(0, 1)
ax1.set_ylabel("precision")
ax2.plot(ppd, recall)
ax2.set_ylim(0, 1)
ax2.set_ylabel("recall")
ax3.plot(ppd, pred_hours, label="predicted")
ax3.plot(ppd, thresh_hours, label="threshold")
ax3.plot(ppd, nom_hours, label="nominal")
ax3.set_xlabel("points per day")
ax3.set_ylabel("fishing hours")
ax3.set_ylim(0, None)
ax3.legend(loc=4)
ax4.plot(ppd, np.array(pred_hours, dtype=float) / nom_hours, label="predicted")
ax4.plot(ppd, np.array(thresh_hours, dtype=float) / nom_hours, label="threshold")
ax4.set_xlabel("points per day")
ax4.set_ylabel("fishing fraction")
ax4.set_ylim(0, None)
ax4.legend(loc=4)
plt.show()
As a consistency check, we repeat the above exercise, but instead of starting with 100-200 PPB and downsampling, we chose the same PPD bins in the original data and repeat the above plots.
As you can see in the plots below, precision, recall and fishing fraction are consistent with the analysis above except where we run out of points. Fishing hours is not, but that is expected since we are looking at different parts of the data set and there's no particular reason for the fishing hours to be the same.
In [11]:
for gear, ylim in [('purse_seine', 20), ('trawler', 2), ('longliner', 2)]:
display(HTML("<h2>{}</h2>".format(gear.replace('_', ' ').title())))
bins = [(100 * alpha, 200 * alpha) for alpha in keep_probs]
mdl = make_model()
train_model_on_data(mdl, train_data[gear])
td = test_data[gear]
raw = mdl.predict_proba(td)[:,1]
def bin_data(raw, td):
bin_centers = []
recall = []
prec = []
predicted_fp = []
nominal_fp = []
threshold_fp = []
counts = td['measure_count_86400']
for start, stop in bins:
mask = (counts >= start) & (counts < stop)
bin_centers.append(0.5 * (start + stop))
predicted, threshold, nominal = _fishing_hours_per_mmsi(
td, raw, mask=mask, max_gap=24)
predicted_fp.append(predicted)
nominal_fp.append(nominal)
threshold_fp.append(threshold)
actual = td['classification'][mask] > 0.5
recall.append(metrics.recall_score(actual, raw[mask] > 0.5))
prec.append(metrics.precision_score(actual, raw[mask] > 0.5))
return recall, prec, bin_centers, np.array(predicted_fp), np.array(nominal_fp), np.array(threshold_fp)
recall, prec, ppd, pred_hours, nom_hours, thresh_hours = bin_data(raw, td)
plt.figure(figsize=(12,8))
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(12,8))
ax1.plot(ppd, prec)
ax1.set_ylim(0, 1)
ax1.set_ylabel("precision")
ax2.plot(ppd, recall)
ax2.set_ylim(0, 1)
ax2.set_ylabel("recall")
ax3.plot(ppd, pred_hours, label="predicted")
ax3.plot(ppd, thresh_hours, label="threshold")
ax3.plot(ppd, nom_hours, label="nominal")
ax3.set_xlabel("points per day")
ax3.set_ylabel("fishing hours")
ax3.set_ylim(0, None)
ax3.legend(loc=4)
ax4.plot(ppd, np.array(pred_hours, dtype=float) / (nom_hours + 1), label="predicted")
ax4.plot(ppd, np.array(thresh_hours, dtype=float) / (nom_hours + 1), label="threshold")
ax4.set_xlabel("points per day")
ax4.set_ylabel("fishing fraction")
ax4.set_ylim(0, None)
ax4.legend(loc=4)
plt.show()