Let's make sure we can reproduce the ballpark results from the old mwp package



In [1]:

    
from itertools import islice

from bubbly.model import Model
from bubbly.dr1 import LocationGenerator
from bubbly.extractors import RingWaveletCompressionExtractor
from bubbly.util import summary









    



/Users/beaumont/Library/Python/2.7/lib/python/site-packages/scikits/__init__.py:1: UserWarning: Module argparse was already imported from /opt/local/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/argparse.pyc, but /Users/beaumont/Library/Python/2.7/lib/python/site-packages is being added to sys.path
  __import__('pkg_resources').declare_namespace(__name__)



In [2]:

    
model = Model(RingWaveletCompressionExtractor(), LocationGenerator(), 
              weak_learner_params=dict(verbose=1, max_depth=1, n_estimators=200, subsample=.4), 
              cascade_params=dict(verbose=1, max_layers=1))

First round of fitting



In [3]:

    
model.fit()









    



WARNING: RuntimeWarning: invalid value encountered in divide [bubbly.util]
WARNING: RuntimeWarning: invalid value encountered in divide [bubbly.util]
WARNING: Non-finite values in feature vectors. Fixing [bubbly.model]
........................................................................................................................................................................................................Cascade round 1. False pos rate: 7.853982e-02. Recall: 9.900442e-01
WARNING: Could not reduce false positive enough after 1 layers. False positive rate: 7.853982e-02. Recall: 9.900442e-01 [bubbly.cascade]






    



No handlers could be found for logger "bubbly.model"



In [4]:

    
x, y = model._make_xy(model.training_data[0]['pos'], model.training_data[0]['neg'])
summary(model.estimator, x, y)









    



False Positive: 0.079
Recall:         0.989
Accuracy:       0.938



In [43]:

    
cv_locator = LocationGenerator(1)
on2 = cv_locator.positives()
off2 = list(islice(cv_locator.negatives_iterator(), 10000))
x2, y2 = model._make_xy(on2, off2)
summary(model.estimator, x2, y2)









    



WARNING: RuntimeWarning: invalid value encountered in divide [bubbly.util]
False Positive: 0.037
Recall:         0.923
Accuracy:       0.960

First Bootstrap



In [15]:

    
off3 = model.cloud_false_positives(2000)



In [16]:

    
model.add_layer(model.training_data[0]['pos'], off3)









    



........................................................................................................................................................................................................



In [17]:

    
summary(model.estimator, x2, y2)









    



False Positive: 0.045
Recall:         0.944
Accuracy:       0.946



In [20]:

    
model.add_layer(model.training_data[0]['pos'], off3)









    



........................................................................................................................................................................................................



In [44]:

    
summary(model.estimator, x2, y2)









    



False Positive: 0.037
Recall:         0.923
Accuracy:       0.960

Second Bootstrap



In [24]:

    
off4 = model.cloud_false_positives(2000)



In [25]:

    
model.add_layer(model.training_data[0]['pos'], off4)









    



........................................................................................................................................................................................................



In [117]:

    
x4, y4 = model._make_xy(model.training_data[0]['pos'], off4)



In [45]:

    
summary(model.estimator, x2, y2)









    



False Positive: 0.037
Recall:         0.923
Accuracy:       0.960



In [27]:

    
summary(model.estimator, x, y)









    



False Positive: 0.030
Recall:         0.978
Accuracy:       0.972



In [118]:

    
summary(model.estimator, x4, y4)









    



False Positive: 0.586
Recall:         0.971
Accuracy:       0.587



In [50]:

    
from bubbly.util import rfp_curve
import brewer2mpl
import matplotlib.pyplot as plt

colors = brewer2mpl.get_map('Purples', 'sequential', 7).mpl_colors[::-1]

for i, y2p in enumerate(model.estimator.staged_decision_function(x2)):
    rfp_curve(y2p, y2, label = 'CV %i' % i, color = colors[i])

yp = model.estimator.decision_function(x)
rfp_curve(yp, y, color='red', label='Training Data')

plt.ylim(0, .05)
plt.legend(loc='upper left')









    Out[50]:





<matplotlib.legend.Legend at 0x11b925d10>

Third Bootstrap



In [52]:

    
off5 = model.cloud_false_positives(2000)



In [53]:

    
model.add_layer(model.training_data[0]['pos'], off5)









    



........................................................................................................................................................................................................



In [54]:

    
summary(model.estimator, x2, y2)









    



False Positive: 0.032
Recall:         0.915
Accuracy:       0.964



In [55]:

    
summary(model.estimator, x, y)









    



False Positive: 0.026
Recall:         0.971
Accuracy:       0.973



In [58]:

    
for i, y2p in enumerate(model.estimator.staged_decision_function(x2)):
    rfp_curve(y2p, y2, label = 'CV %i' % i, color = colors[i])

yp = model.estimator.decision_function(x)
rfp_curve(yp, y, color='red', label='Training Data')

plt.ylim(0, .01)
plt.legend(loc='upper left')









    Out[58]:





<matplotlib.legend.Legend at 0x11e0055d0>



In [136]:

    
from json import dump
with open('../models/reproducing_old_training_data.json', 'w') as outfile:
    dump(model.training_data, outfile)



In [ ]: