In [1]:
%pylab inline
In [2]:
import pandas
import root_numpy
In [3]:
train_features = ['Bplus_ENDVERTEX_Y', 'Bplus_P', 'Bplus_PT']
uniform_features = ['KS0_TAU']
all_features = train_features + uniform_features
In [4]:
def read_data(filename):
df = root_numpy.root2array(filename, treename='DecayTree', branches=all_features, stop=200000)
return pandas.DataFrame(df)
In [5]:
bck = read_data('/moosefs/notebook/datasets/inflation/highSb_data-LL_strict_BDT_96Up.root')
sig = read_data('/moosefs/notebook/datasets/inflation/12113095-LL-Official.root')
print len(sig), len(bck)
In [6]:
data = pandas.concat([sig, bck])
answers = numpy.concatenate([numpy.ones(len(sig)), numpy.zeros(len(bck))])
In [7]:
from hep_ml.commonutils import train_test_split
trainX, testX, trainY, testY = train_test_split(data, answers, train_size=0.51)
In [8]:
from sklearn.ensemble import GradientBoostingClassifier
from hep_ml.ugradientboosting import uGradientBoostingClassifier, KnnFlatnessLossFunction
In [9]:
from rep.metaml import ClassifiersFactory
from rep.estimators import SklearnClassifier
classifiers = ClassifiersFactory()
gb = GradientBoostingClassifier(n_estimators=400, max_depth=4, min_samples_leaf=100, learning_rate=0.05)
classifiers['GB'] = SklearnClassifier(gb, features=train_features)
# uniform_label=0 -> uniform in background
loss = KnnFlatnessLossFunction(uniform_features, ada_coefficient=0.1, uniform_label=0)
ugb = uGradientBoostingClassifier(loss=loss, train_variables=train_features, n_estimators=400,
max_depth=4, min_samples_leaf=100, learning_rate=0.2)
classifiers['uGB'] = SklearnClassifier(ugb)
classifiers.fit(trainX, trainY)
pass
In [10]:
predictions = classifiers.test_on(testX, testY,)
In [11]:
from rep.report import metrics
predictions.learning_curve(metrics={'roc': metrics.RocAuc()}, steps=1)
Out[11]:
In [12]:
predictions.roc()
Out[12]:
In [13]:
from hep_ml.metrics import KnnBasedSDE
predictions.learning_curve(metrics={'sde': KnnBasedSDE(uniform_features=uniform_features, uniform_label=0)})
Out[13]:
In [14]:
predictions.learning_curve(metrics={'sde': KnnBasedSDE(uniform_features=uniform_features, uniform_label=1)})
Out[14]:
In [15]:
# ignored_sideban
predictions.efficiencies(features=uniform_features, ignored_sideband=0.02)
Out[15]: