Let's make sure we can reproduce the ballpark results from the old mwp
package
In [1]:
from itertools import islice
from bubbly.model import Model
from bubbly.dr1 import LocationGenerator
from bubbly.extractors import RingWaveletCompressionExtractor
from bubbly.util import summary
In [2]:
model = Model(RingWaveletCompressionExtractor(), LocationGenerator(),
weak_learner_params=dict(verbose=1, max_depth=1, n_estimators=200, subsample=.4),
cascade_params=dict(verbose=1, max_layers=1))
In [3]:
model.fit()
In [4]:
x, y = model._make_xy(model.training_data[0]['pos'], model.training_data[0]['neg'])
summary(model.estimator, x, y)
In [43]:
cv_locator = LocationGenerator(1)
on2 = cv_locator.positives()
off2 = list(islice(cv_locator.negatives_iterator(), 10000))
x2, y2 = model._make_xy(on2, off2)
summary(model.estimator, x2, y2)
In [15]:
off3 = model.cloud_false_positives(2000)
In [16]:
model.add_layer(model.training_data[0]['pos'], off3)
In [17]:
summary(model.estimator, x2, y2)
In [20]:
model.add_layer(model.training_data[0]['pos'], off3)
In [44]:
summary(model.estimator, x2, y2)
In [24]:
off4 = model.cloud_false_positives(2000)
In [25]:
model.add_layer(model.training_data[0]['pos'], off4)
In [117]:
x4, y4 = model._make_xy(model.training_data[0]['pos'], off4)
In [45]:
summary(model.estimator, x2, y2)
In [27]:
summary(model.estimator, x, y)
In [118]:
summary(model.estimator, x4, y4)
In [50]:
from bubbly.util import rfp_curve
import brewer2mpl
import matplotlib.pyplot as plt
colors = brewer2mpl.get_map('Purples', 'sequential', 7).mpl_colors[::-1]
for i, y2p in enumerate(model.estimator.staged_decision_function(x2)):
rfp_curve(y2p, y2, label = 'CV %i' % i, color = colors[i])
yp = model.estimator.decision_function(x)
rfp_curve(yp, y, color='red', label='Training Data')
plt.ylim(0, .05)
plt.legend(loc='upper left')
Out[50]:
In [52]:
off5 = model.cloud_false_positives(2000)
In [53]:
model.add_layer(model.training_data[0]['pos'], off5)
In [54]:
summary(model.estimator, x2, y2)
In [55]:
summary(model.estimator, x, y)
In [58]:
for i, y2p in enumerate(model.estimator.staged_decision_function(x2)):
rfp_curve(y2p, y2, label = 'CV %i' % i, color = colors[i])
yp = model.estimator.decision_function(x)
rfp_curve(yp, y, color='red', label='Training Data')
plt.ylim(0, .01)
plt.legend(loc='upper left')
Out[58]:
In [136]:
from json import dump
with open('../models/reproducing_old_training_data.json', 'w') as outfile:
dump(model.training_data, outfile)
In [ ]: