In [1]:
from sklearn.base import BaseEstimator, ClassifierMixin
import subprocess
import numpy as np

In [4]:
class BayesPointMachine(BaseEstimator, ClassifierMixin):  
    """An example of classifier"""

    def __init__(self, iterations=30, batches=1, compute_evidence=False,
                 train_file="train.txt",
                 test_file="test.txt",
                 prediction_file="predictions.txt",
                 model_file="trained-binary-bpm.bin",
                 multiclass=False):
        """
        Called when initializing the classifier
        """
        self.iterations = iterations
        self.batches = batches
        self.compute_evidence = compute_evidence
        self.train_file = train_file
        self.test_file = test_file
        self.prediction_file = prediction_file
        self.model_file = model_file
        self.multiclass = multiclass
        self.trained = False
        self.model_name = "BinaryBayesPointMachine" if not self.multiclass else "MulticlassBayesPointMachine"
        self._classes = set()
        
    def fit(self, X, y=None):
        """
        This should fit classifier. All the "work" should be done here.
        """
        
        self._classes = set(y)
        
        # First create the input file for Infer.NET
        self._create_input_file(self.train_file, X, y)
        
        # Then call the command line runner
        cmd = ["mono", "Learner.exe", "Classifier", self.model_name, "Train",
               "--training-set", self.train_file, 
               "--model", self.model_file]
        self._execute(cmd)
        
        self.trained = True
        
        return self

    def predict_proba(self, X, y=None):
        if not self.trained:
            raise RuntimeError("You must train classifer before predicting data!")
        
        # First create the input file for Infer.NET
        self._create_input_file(self.test_file, X)
        
        # Then call the command line runner
        cmd = ["mono", "Learner.exe", "Classifier", self.model_name, "Predict",
               "--test-set", self.test_file, 
               "--model", self.model_file,
               "--predictions", self.prediction_file]
        self._execute(cmd)
        
        # Now load the predictions back in
        preds = list(self._get_predictions())
        return np.array(preds)
        
    def predict(self, X, y=None):
        return([np.argmax(x) for x in self.predict_proba(X, y)])    

    def _execute(self, cmd):
        p = subprocess.Popen(cmd, shell=False, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
        for line in iter(p.stdout.readline, ''): 
            print line
        retval = p.wait()
    
    def _create_input_file(self, filename, X, y=None):
        if y is None:
            self._create_input_file(filename, X, [0 for i in range(len(X))])
            return
        with open(filename, 'w') as f:
            for x_i, y_i in zip(X, y):
                if hasattr(x_i, '__iter__'):
                    f.write('%d ' % y_i + ['%d:%d' % (j, x_ij) for j, x_ij in enumerate(x_i)] + '\n')
                else:
                    f.write('%d 0:%d\n' % (y_i, x_i))

    def _get_predictions(self):
        with open(self.prediction_file) as f:
            # 1=0.49321002813527 0=0.50678997186473
            for line in f:
                preds = np.empty((len(self._classes),))
                for pred in line.split(' '):
                    y, prob = pred.split('=')
                    preds[y] = prob
                yield preds

In [5]:
from sklearn.grid_search import GridSearchCV

X_train = [i for i in range(0, 100, 5)]  
X_test = [i + 3 for i in range(-5, 95, 5)]  

bpm = BayesPointMachine()

# for some reason I have to pass y with same shape
# otherwise gridsearch throws an error. Not sure why.
bpm.fit(X_test, y=[i / 10 for i in range(20)])

bpm.predict(X_test)


Data set contains 20 instances, 2 classes and 1 features.

Data set contains 20 instances, 1 classes and 1 features.

/usr/local/anaconda/lib/python2.7/site-packages/ipykernel/__main__.py:90: DeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
Out[5]:
[0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

In [ ]: