In [1]:
from itertools import islice
import logging
import json
from bubbly.model import Model
from bubbly.dr1 import LocationGenerator, highest_quality_on_params
from bubbly.extractors import RingWaveletCompressionExtractor, enhance_contrast
from bubbly.util import summary
from bubbly.util import rfp_curve
import brewer2mpl
logging.getLogger('bubbly').addHandler(logging.StreamHandler())
%pylab
In [2]:
ex = RingWaveletCompressionExtractor()
l = LocationGenerator()
l.positive_generator = highest_quality_on_params
model = Model(ex, l,
weak_learner_params=dict(verbose=1, max_depth=1, n_estimators=200, subsample=.4),
cascade_params=dict(verbose=1, max_layers=1))
In [3]:
model.fit()
In [4]:
x, y = model._make_xy(model.training_data[0]['pos'], model.training_data[0]['neg'])
summary(model.estimator, x, y)
In [5]:
cv_locator = LocationGenerator(1)
cv_locator.positive_generator = highest_quality_on_params
on2 = cv_locator.positives()
off2 = list(islice(cv_locator.negatives_iterator(), 10000))
x2, y2 = model._make_xy(on2, off2)
summary(model.estimator, x2, y2)
In [6]:
colors = brewer2mpl.get_map('Purples', 'sequential', 7).mpl_colors[::-1]
for i, y2p in enumerate(model.estimator.staged_decision_function(x2)):
rfp_curve(y2p, y2, label = 'CV %i' % i, color = colors[i])
yp = model.estimator.decision_function(x)
rfp_curve(yp, y, color='red', label='Training Data')
plt.ylim(0, .01)
plt.legend(loc='upper left')
Out[6]:
In [7]:
off3 = model.cloud_false_positives(1000, workers=50)
on3 = model.training_data[0]['pos']
In [8]:
model.add_layer(on3, off3)
In [19]:
colors = ['c', 'm']
yp = model.estimator.decision_function(x)
rfp_curve(yp, y, color='red', label='Training Data')
for i, y2p in enumerate(model.estimator.staged_decision_function(x2)):
rfp_curve(y2p, y2, label = 'CV %i' % i, color = colors[i])
plt.ylim(0, .01)
plt.legend(loc='upper left')
Out[19]:
In [17]:
model.estimator.bias_.pop()
model.estimator.estimators_.pop()
model.add_layer(on3, off3[:462])
In [28]:
import random
model2 = Model(ex, l,
weak_learner_params=dict(verbose=1, max_depth=1, n_estimators=200, subsample=.4),
cascade_params=dict(verbose=1, max_layers=1))
off_all = off3 + model.training_data[0]['neg']
model2.fit(on3, random.sample(off_all, len(on3)))
In [32]:
model2.add_layer(on3, random.sample(off_all, len(on3)))
In [40]:
colors = ['c', 'm', 'b']
yp = model2.estimator.decision_function(x)
rfp_curve(yp, y, color='red', label='Training Data')
for i, y2p in enumerate(model2.estimator.staged_decision_function(x2)):
rfp_curve(y2p, y2, label = 'CV %i' % i, color = colors[i])
plt.ylim(0, .002)
plt.legend(loc='upper left')
Out[40]:
In [38]:
data = {'pos': on3, 'neg': off_all, 'cv_pos': on2, 'cv_neg': off2}
with open('../models/benchmark_training_data.json', 'w') as outfile:
json.dump(data, outfile)