About

This notebook is devoted to analysis of decay $D \to hhh$


In [1]:
%pylab inline


Populating the interactive namespace from numpy and matplotlib

In [2]:
import numpy, pandas
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import AdaBoostClassifier
import root_numpy

from hep_ml import uboost, reports
from hep_ml import HidingClassifier
from hep_ml.commonutils import train_test_split
from hep_ml.meanadaboost import MeanAdaBoostClassifier
from hep_ml.ugradientboosting import uGradientBoostingClassifier as uGB
#  profile of IPython cluster used to parallelize computations (None = no cluster)
from hep_ml.config import ipc_profile
import hep_ml.ugradientboosting as ugb

In [3]:
numpy.random.seed(42)

Features used


In [4]:
# we want to have predictions flat on the mass
uniform_variables = ['D_MM']
# the variables that will be used in training
train_variables = ['D_P', 'D_BKGCAT', 'D_FD_OWNPV',
                   'D_TAU', 'D_BPVIPCHI2', 
                   'D_DIRA_OWNPV',
                   'D_DOCA', 'D_DOCA12', 'D_DOCA13', 'D_DOCA23',
                   'D_vchi2', 'D_MINIP', 'D_PT', 'D_MMERR',
                   'p1_PT', 'p2_PT', 'p3_PT'
                   ]

helping_variables = ['D_BKGCAT'] + ['p' + str(i) + '_P' + letter for i in [1,2,3] for letter in "EXYZ"] + \
                    ['p' + str(i) + '_TRUEP_' + letter for i in [1,2,3] for letter in "EXYZ"]

In [5]:
all_branches = uniform_variables + train_variables + helping_variables
all_branches = list(set(all_branches) - set(['D_TAU']))

Loading data from ROOT files


In [6]:
def load_data(name):
    return pandas.DataFrame(root_numpy.root2array('../hep_ml/datasets/D23P/' + name, treename='DecayTree', branches=all_branches))

In [7]:
sig1 = load_data('D2PPP_Signal_Stripped_MD_filtered.root')
sig2 = load_data('D2PPP_Signal_Stripped_MU_filtered.root')
bck1 = load_data('bbbar_bkg_ppp_wide_filtered.root').query('D_BKGCAT > 20')
bck2 = load_data('ccbar_bkg_ppp_wide_filtered.root').query('D_BKGCAT > 20')

concatenating data to one dataframe:


In [8]:
data = pandas.concat([bck1, bck2, sig1, sig2], ignore_index=True)
labels = numpy.concatenate([numpy.zeros(len(bck1) + len(bck2)), numpy.ones(len(sig1) + len(sig2))]).astype(int)

In [9]:
hist(data.D_MM[labels == 1].values, bins=40, label='signal')
hist(data.D_MM[labels == 0].values, bins=40, label='bg')
legend()


Out[9]:
<matplotlib.legend.Legend at 0x52166d0>

Adding flight time, invariant masses, minimal distance to corners of Dalitz polt

$\qquad \tau = t \sqrt{1 - \beta^2} = \dfrac{FD \sqrt{1 - \beta^2}}{ c \beta } = \dfrac{FD \times m}{c \times p}$


In [10]:
# 300 is light speed
data['D_TAU'] = data.eval('D_FD_OWNPV * D_MM / D_P / 300')

def compute_square_inv_mass(df, name1, name2):
    pe = df[name1 + 'E'] + df[name2 + 'E']
    px = df[name1 + 'X'] + df[name2 + 'X']
    py = df[name1 + 'Y'] + df[name2 + 'Y']
    pz = df[name1 + 'Z'] + df[name2 + 'Z']
    return pe*pe - px*px - py*py - pz*pz

# data['m12'] = compute_square_inv_mass(data, 'p1_P', 'p2_P')
# data['m13'] = compute_square_inv_mass(data, 'p1_P', 'p3_P')
# data['m23'] = compute_square_inv_mass(data, 'p2_P', 'p3_P')

# using true information for computing uniformity / plots
data['m12'] = compute_square_inv_mass(data, 'p1_TRUEP_', 'p2_TRUEP_')
data['m13'] = compute_square_inv_mass(data, 'p1_TRUEP_', 'p3_TRUEP_')
data['m23'] = compute_square_inv_mass(data, 'p2_TRUEP_', 'p3_TRUEP_')


d1 = (1.969-0.1396)**2 - data.m23 * 1e-6
d2 = (1.969-0.1396)**2 - data.m13 * 1e-6
d3 = (1.969-0.1396)**2 - data.m12 * 1e-6

data['min_dist'] = numpy.minimum(numpy.minimum(d1, d2), d3)

Deleting unphysical events


In [11]:
mask = numpy.array(data.min_dist > 0)
data = data.ix[mask, :]
labels = labels[mask]

Description of classifiers

introduce function that prepares a set of classifiers


In [12]:
def prepare_classifiers(train_variables, uniform_variables, n_estimators=200, max_depth=4, min_samples_leaf=30, uniform_label=0):
    """
    This function prepares the classifiers with proper parameters, 
    the parameters for decision tree are the same in all classifiers.
    
    uniform_variables: list of variables along which uniformity of predictions is desired

    train_variables: list of variables used by classifiers 
                    (should not include uniform variables)
    
    uniform_label: 1, if we want flatness in signal predictions
                   0, if we want flatness in background
    
    """
    # parameter for gradient boosting
    ugb_params = {'max_depth': max_depth, 
                  'min_samples_leaf': min_samples_leaf,
                  'train_variables': train_variables, 
                  'subsample': 0.5, 
                  'n_estimators': n_estimators}
    # parameter fot other classifiers
    common_params = {'uniform_variables': uniform_variables, 
                     'train_variables': train_variables, 
                     'n_estimators': n_estimators}

    base_tree = DecisionTreeClassifier(max_depth=max_depth, min_samples_leaf=min_samples_leaf)
    
    classifiers = reports.ClassifiersDict()
    
    base_ada = AdaBoostClassifier(base_estimator=base_tree, n_estimators=n_estimators, learning_rate=0.2)
    classifiers['ada']    = HidingClassifier(train_variables=train_variables, base_estimator=base_ada)

    classifiers['knnAda'] = MeanAdaBoostClassifier(base_estimator=base_tree, learning_rate=0.1, uniform_label=[0,1], **common_params)
    
    classifiers['uGB+nn'] = uGB(loss=ugb.SimpleKnnLossFunction(uniform_variables, knn=10, uniform_label=[0,1]), 
                                learning_rate=0.2, **ugb_params)

    binflatnessloss = ugb.BinFlatnessLossFunction(uniform_variables, uniform_label=uniform_label, ada_coefficient=0.03)
    classifiers['uGB+binFL'] = uGB(loss=binflatnessloss, learning_rate=0.1, **ugb_params)
    
    knnflatnessloss = ugb.KnnFlatnessLossFunction(uniform_variables, n_neighbours=300, 
                                                  uniform_label=uniform_label, ada_coefficient=0.03)
    classifiers['uGB+knnFL'] = uGB(loss=knnflatnessloss, learning_rate=0.1, **ugb_params)
    
#     classifiers['uBDT']   = uboost.uBoostBDT(base_estimator=base_tree, uniform_label=uniform_label, **common_params)

    classifiers['uBoost'] = uboost.uBoostClassifier(base_estimator=base_tree, uniform_label=uniform_label, 
                                                    efficiency_steps=7, **common_params)
    return classifiers

Uniformity in background

we are training on the left sideband of bck (D_MM < 1850), then look at background efficiency

uniform_label=0 means that we want to have uniformity in background

Splitting data


In [13]:
# test mode
# mask = numpy.random.random(len(data)) > 0.9
# data = data.ix[mask, :]
# labels = labels[mask]

In [14]:
trainX, testX, trainY, testY = train_test_split(data, labels, train_size=0.5)

In [15]:
mask = (trainY == 1) | (trainX.D_MM < 1850)
trainX_bck = trainX.ix[mask, :]
trainY_bck = trainY[numpy.array(mask)]

In [16]:
hist(numpy.array(testX.D_MM[testY == 0]), label='test bck', bins=30)
hist(numpy.array(trainX_bck.D_MM[trainY_bck == 0]),  label='train bck')
legend()


Out[16]:
<matplotlib.legend.Legend at 0x6840510>

Training classifiers


In [17]:
classifiers = prepare_classifiers(train_variables, uniform_variables, uniform_label=0)
classifiers.fit(trainX_bck, trainY_bck, ipc_profile=ipc_profile)
preds = classifiers.test_on(testX, testY)


Classifier          ada is learnt in 65.63 seconds
Classifier       knnAda is learnt in 60.16 seconds
Classifier       uGB+nn is learnt in 35.73 seconds
Classifier    uGB+binFL is learnt in 16.58 seconds
Classifier    uGB+knnFL is learnt in 65.04 seconds
Classifier       uBoost is learnt in 291.93 seconds
Totally spent 328.68 seconds on parallel training

Dynamics (dependence on the number of trees trained)


In [18]:
figure(figsize=(18, 7))
subplot(131), title('Learning curves'), preds.learning_curves()
subplot(132), title('SDE curves:sig') , preds.sde_curves(uniform_variables=uniform_variables, step=3, label=1)
subplot(133), title('SDE curves:bg')  , preds.sde_curves(uniform_variables=uniform_variables, step=3, label=0)
show()


Efficiency on signal


In [19]:
preds.efficiency(uniform_variables=uniform_variables, label=1)


Out[19]:
<hep_ml.reports.Predictions at 0x6848f90>

In [20]:
preds.prediction_pdf(bins=50)



In [21]:
pr = preds.predictions['uGB+binFL'][ testY == 1 , 1]
hist(pr, bins=100)
numpy.unique(pr), len(numpy.unique(pr))


Out[21]:
(array([ 0.4491001 ,  0.44957179,  0.45186503, ...,  0.60428459,
         0.60459809,  0.61548906]), 3459)

In [22]:
preds.roc()


Out[22]:
<hep_ml.reports.Predictions at 0x6848f90>

Efficiency on background

NB, efficiency of bck here is part of right-classified background events

There is a slope which adaboost has!


In [23]:
preds.efficiency(uniform_variables=uniform_variables, label=0)


Out[23]:
<hep_ml.reports.Predictions at 0x6848f90>

ROC curves


In [24]:
preds.roc()


Out[24]:
<hep_ml.reports.Predictions at 0x6848f90>

In [25]:
# preds.root_roc().SaveAs('datasets/D23P/plots/roc_flat_in_bck.root')
# preds.root_roc()

Correlation of predictions on background


In [26]:
preds.correlation_curves(var_name=uniform_variables[0], label=0)


Out[26]:
<hep_ml.reports.Predictions at 0x6848f90>

Different measures of uniformity

compared on the background


In [27]:
figure(figsize=(20, 7))
subplot(141), preds.sde_curves(uniform_variables=uniform_variables, step=5, label=0),   title('SDE')
subplot(142), preds.cvm_curves(uniform_variables=uniform_variables, step=5, label=0),   title('Cramer-von Mises')
subplot(143), preds.theil_curves(uniform_variables=uniform_variables, step=5, label=0), title('Theil')
subplot(144), preds.ks_curves(uniform_variables=uniform_variables, step=5, label=0),    title('KS')
show()


Uniformity in signal

Now we are trying to get uniform predictions in signal,

which is much harder, because signal is serously concentrated. This time we do not throw high masses region

pay attention: uniform_label=1 means we want to be flat in signal


In [28]:
classifiers2 = prepare_classifiers(train_variables, uniform_variables, uniform_label=1)
classifiers2.fit(trainX, trainY, ipc_profile=ipc_profile)
sig_preds = classifiers2.test_on(testX, testY)


Classifier          ada is learnt in 79.20 seconds
Classifier       knnAda is learnt in 72.09 seconds
Classifier       uGB+nn is learnt in 39.57 seconds
Classifier    uGB+binFL is learnt in 47.50 seconds
Classifier    uGB+knnFL is learnt in 102.75 seconds
Classifier       uBoost is learnt in 384.96 seconds
Totally spent 433.06 seconds on parallel training

Training dynamics


In [29]:
figure(figsize=(18, 7))
subplot(131), title('Learning curves'), sig_preds.learning_curves()
subplot(132), title('SDE curves:sig') , sig_preds.sde_curves(uniform_variables=uniform_variables, step=3, label=1)
subplot(133), title('SDE curves:bg')  , sig_preds.sde_curves(uniform_variables=uniform_variables, step=3, label=0)
show()


Efficiency on signal and background


In [30]:
sig_preds.efficiency(uniform_variables=uniform_variables, label=1)
sig_preds.efficiency(uniform_variables=uniform_variables, label=0)


Out[30]:
<hep_ml.reports.Predictions at 0xeca3610>

ROC curves


In [31]:
# sig_preds.root_roc().SaveAs('datasets/D23P/plots/roc_flat_in_sig_Dmass.root')
sig_preds.roc()


Out[31]:
<hep_ml.reports.Predictions at 0xeca3610>

Dalitz variables


In [32]:
def plotDistribution2D(var_name1, var_name2, data_frame, bins=30):
    pylab.hist2d(numpy.array(data_frame[var_name1]), numpy.array(data_frame[var_name2]), bins=bins)
    pylab.xlabel(var_name1), pylab.ylabel(var_name2)
    pylab.colorbar()

pylab.figure(figsize=(16, 6))
subplot(1, 2, 1), pylab.title("signal"),       plotDistribution2D("m12", "m13", data[labels == 1])
subplot(1, 2, 2), pylab.title("background"),   plotDistribution2D("m12", "m13", data[labels == 0])
show()


Aiming at uniformity in signal in Dalitz variables


In [33]:
dalitz_vars = ['m12', 'm13']

In [34]:
dalitz_classifiers = prepare_classifiers(train_variables, uniform_variables=dalitz_vars, uniform_label=1)
dalitz_classifiers.fit(trainX, trainY, ipc_profile=ipc_profile)
dalitz_predictions = dalitz_classifiers.test_on(testX, testY)


Classifier          ada is learnt in 79.66 seconds
Classifier       knnAda is learnt in 62.32 seconds
Classifier       uGB+nn is learnt in 42.29 seconds
Classifier    uGB+binFL is learnt in 44.10 seconds
Classifier    uGB+knnFL is learnt in 103.70 seconds
Classifier       uBoost is learnt in 368.57 seconds
Totally spent 413.19 seconds on parallel training

In [35]:
# dalitz_predictions.root_roc().SaveAs('datasets/D23P/plots/roc_flat_in_sig_Dalitz.root')
dalitz_predictions.roc()


Out[35]:
<hep_ml.reports.Predictions at 0xfabc6d0>

Efficiencies


In [36]:
dalitz_predictions.efficiency(uniform_variables=dalitz_vars, n_bins=15, label=1)


Stage result, efficiency=0.60
Stage result, efficiency=0.70
Stage result, efficiency=0.80
Stage result, efficiency=0.90
Out[36]:
<hep_ml.reports.Predictions at 0xfabc6d0>

In [37]:
dalitz_predictions.prediction_pdf()


SDE


In [38]:
figure(figsize=[15, 7])
dalitz_predictions.sde_curves(dalitz_vars, step=10)
figure(figsize=[15, 7])
dalitz_predictions.learning_curves(step=10)


Out[38]:
<hep_ml.reports.Predictions at 0xfabc6d0>

Minimal distance from corners


In [39]:
dalitz_predictions.hist(['min_dist'])


Out[39]:
<hep_ml.reports.Predictions at 0xfabc6d0>

In [40]:
dalitz_predictions.efficiency(['min_dist'], label=1)


Out[40]:
<hep_ml.reports.Predictions at 0xfabc6d0>

In [41]:
figure(figsize=[13, 7])
dalitz_predictions.sde_curves(['min_dist'], label=1)


Playing with ada_coefficient parameter

Let's recall that we use the following loss function

$\text{loss} = \text{FlatnessLoss} + \alpha \; \text{AdaLoss} $

Let's show how the result depends on $\alpha$ (which has the name ada_coefficient), we will try to get uniformity on signal


In [42]:
fl_clf = reports.ClassifiersDict()
fl_clf['AdaBoost'] = HidingClassifier(train_variables, 
    AdaBoostClassifier(base_estimator=DecisionTreeClassifier(max_depth=4, min_samples_leaf=30,), n_estimators=200, learning_rate=0.2))
for alpha in [0.01, 0.02, 0.05, 0.1, 0.2]:
    flatnessloss = ugb.KnnFlatnessLossFunction(dalitz_vars, uniform_label=1, ada_coefficient=alpha)
    fl_clf['uGB+FL, alpha=' + str(alpha)] = uGB(loss=flatnessloss, train_variables=train_variables, subsample=0.5, 
                                                learning_rate=0.1, n_estimators=200, max_depth=4, min_samples_leaf=30)

fl_clf.fit(trainX, trainY, ipc_profile=ipc_profile)
fl_pred = fl_clf.test_on(testX, testY)


Classifier     AdaBoost is learnt in 77.96 seconds
Classifier uGB+FL, alpha=0.01 is learnt in 80.35 seconds
Classifier uGB+FL, alpha=0.02 is learnt in 88.00 seconds
Classifier uGB+FL, alpha=0.05 is learnt in 79.85 seconds
Classifier uGB+FL, alpha=0.1 is learnt in 81.92 seconds
Classifier uGB+FL, alpha=0.2 is learnt in 80.23 seconds
Totally spent 160.43 seconds on parallel training

As it can be seen from graphics below,

this parameter is responsible for tradeoff between uniformity and quality of prediction


In [43]:
mpl.rcParams['lines.linewidth'] = 2.

In [44]:
fl_pred.learning_curves(), ylim(0.85, 1.)
legend().set_visible(False)
show()

sde_data = pandas.DataFrame(fl_pred.sde_curves(dalitz_vars, label=1, return_data=True))
legend().set_visible(False)
# sde_data.to_csv('datasets/D23P/plots/sde_for_differend_alpha.csv')



In [45]:
# fl_pred.root_roc().SaveAs('datasets/D23P/plots/roc_for_different_alpha.root')
fl_pred.roc()


Out[45]:
<hep_ml.reports.Predictions at 0xe6b16d0>

Saving predictions


In [46]:
# stop execution here
break


  File "<ipython-input-46-3106a075f401>", line 2
    break
SyntaxError: 'break' outside loop

In [ ]:
def save_predictions(X, y, classifiers, filename):
    X2 = X.copy()
    X2['is_signal'] = y
    for name, clf in classifiers.iteritems():
        X2['pred_' + name] = clf.predict_proba(X)[:, 1]
    root_numpy.array2root(X2.to_records(), mode='recreate', filename=filename)

In [ ]:
save_predictions(testX, testY, dalitz_classifiers, 'datasets/D23P/dalitz_test_prediction.root')

In [ ]:
save_predictions(testX, testY, classifiers, 'datasets/D23P/flat_in_DM_on_bck_prediction.root')

In [ ]:
save_predictions(testX, testY, fl_clf, 'datasets/D23P/different_alpha_prediction.root')

Appendix. Plots for presentation


In [ ]:
n_estimators=200
max_depth=4
min_samples_leaf=30
uniform_label=1

# parameter for gradient boosting
ugb_params = {'max_depth': max_depth, 
              'min_samples_leaf': min_samples_leaf,
              'train_variables': train_variables, 
              'subsample': 0.5, 
              'n_estimators': n_estimators}
# parameter fot other classifiers
common_params = {'uniform_variables': uniform_variables, 
                 'train_variables': train_variables, 
                 'n_estimators': n_estimators}

base_tree = DecisionTreeClassifier(max_depth=max_depth, min_samples_leaf=min_samples_leaf)

classifiers = reports.ClassifiersDict()

base_ada = AdaBoostClassifier(base_estimator=base_tree, n_estimators=n_estimators, learning_rate=0.2)
classifiers['ada']    = HidingClassifier(train_variables=train_variables, base_estimator=base_ada)

classifiers['knnAda'] = MeanAdaBoostClassifier(base_estimator=base_tree, learning_rate=0.1, uniform_label=[0,1], **common_params)

classifiers['uGB+nn'] = uGB(loss=ugb.SimpleKnnLossFunction(uniform_variables, knn=10, uniform_label=[0,1]), 
                            learning_rate=0.2, **ugb_params)

binflatnessloss = ugb.BinFlatnessLossFunction(uniform_variables, uniform_label=uniform_label, ada_coefficient=0.03)
classifiers['uGB+binFL'] = uGB(loss=binflatnessloss, learning_rate=0.1, **ugb_params)

knnflatnessloss = ugb.KnnFlatnessLossFunction(uniform_variables, n_neighbours=300, 
                                              uniform_label=uniform_label, ada_coefficient=0.03)
classifiers['uGB+knnFL'] = uGB(loss=knnflatnessloss, learning_rate=0.1, **ugb_params)

#     classifiers['uBDT']   = uboost.uBoostBDT(base_estimator=base_tree, uniform_label=uniform_label, **common_params)

classifiers['uBoost'] = uboost.uBoostClassifier(base_estimator=base_tree, uniform_label=uniform_label, 
                                                efficiency_steps=7, **common_params)
return classifiers

In [ ]:
classifiers_plot = prepare_classifiers(train_variables, uniform_variables, uniform_label=1)
# classifiers_plot.pop('uBoost')
classifiers_plot.fit(trainX, trainY, ipc_profile=ipc_profile)
sig_preds = classifiers_plot.test_on(testX, testY)

In [ ]:
figure(figsize=(18, 7))
subplot(131), title('Learning curves'), sig_preds.learning_curves()
subplot(132), title('SDE curves:sig') , sig_preds.sde_curves(uniform_variables=uniform_variables, step=3, label=1)
subplot(133), title('SDE curves:bg')  , sig_preds.sde_curves(uniform_variables=uniform_variables, step=3, label=0)
show()

In [ ]:
sig_preds.efficiency(uniform_variables=uniform_variables, label=1)
sig_preds.efficiency(uniform_variables=uniform_variables, label=0)

In [ ]:
hist(trainX.D_MM.values[trainY == 0], normed=True)
hist(trainX.D_MM.values[trainY == 1], normed=True)
pass

Appendix2. SDE for uniformity in Dalitz variables (for paper)


In [47]:
import os, sys
import ROOT
from ROOT import TGraph, TCanvas, TLegend
from array import array


ROOT.gROOT.LoadMacro('../assets/paperdraft/graphs/lhcbStyle.h')
ROOT.lhcbStyle()

In [48]:
from rep_tmva import rootnotes
marker = 20
size = 1
colour = 1

In [49]:
dalitz_vars = ['m12', 'm13']

sde_dalitz_data = dalitz_predictions.sde_curves(dalitz_vars, step=10, return_data=True)



In [ ]:
from collections import OrderedDict
sde_dalitz_renamed = OrderedDict()
sde_dalitz_renamed['AdaBoost'] = sde_dalitz_data['ada']
sde_dalitz_renamed['kNNAda'] = sde_dalitz_data['knnAda']
sde_dalitz_renamed['uGBkNN'] = sde_dalitz_data['uGB+nn']
sde_dalitz_renamed['uGBFL(bin)'] = sde_dalitz_data['uGB+binFL']
sde_dalitz_renamed['uGBFL(knn)'] = sde_dalitz_data['uGB+knnFL']
sde_dalitz_renamed['uBoost'] = sde_dalitz_data['uBoost']

In [ ]:
keeper = []

In [ ]:
def plot_sde_root(df):
    canvas = rootnotes.canvas("canv%i" % len(keeper), (1500, 800))
    canvas.Divide(1)
    canvas.cd(1)
    
    legend = TLegend(0.175, 0.17, 0.525, 0.45)
    legend.SetFillStyle(1001)
    legend.SetFillColor(ROOT.kWhite)
    legend.SetMargin(0.35)
    legend.SetTextSize(0.04)
    keeper.append(legend)
    for i, (color_id, (name, graph)) in enumerate(zip([0, 6, 2, 1, 1, 3], df.iteritems())):
        plot = TGraph(len(graph), array('f', graph.keys()), array('f', graph.values))
        plot.SetLineColor(colour + color_id)
        plot.SetMarkerColor(colour + color_id)
        plot.SetMarkerSize(2)
        plot.SetMarkerStyle(marker + i)
        plot.GetXaxis().SetTitle('n trees')
        plot.GetXaxis().SetNdivisions(510)
        plot.GetYaxis().SetTitle('SDE')
        plot.GetXaxis().SetTitleSize(0.1)
        plot.GetXaxis().SetTitleOffset(0.65)
        plot.GetXaxis().SetLabelSize(0.075)
        plot.GetXaxis().SetLabelOffset(0.015)
        plot.GetYaxis().SetTitleSize(0.1)
        plot.GetYaxis().SetTitleOffset(0.65)
        plot.GetYaxis().SetLabelSize(0.075)
        plot.GetYaxis().SetRangeUser(0.0, 0.11)
        plot.GetXaxis().SetRangeUser(0.0, 200.0)

        plot.SetTitle(name)
        legend.AddEntry(plot, name ,'pl')

        if i == 0:
            plot.Draw('APL')
        else:
            plot.Draw('PL')
        
        keeper.append(plot)
    legend.Draw()
    
    keeper.append(canvas)
    return canvas

In [ ]:
canvas = plot_sde_root(sde_dalitz_renamed)

In [ ]:
canvas

In [ ]:
# canvas.SaveAs('../assets/paperdraft/graphs/sde_for_dalitz_staged.png')
# canvas.SaveAs('../assets/paperdraft/graphs/sde_for_dalitz_staged.pdf')
# canvas.SaveAs('../assets/paperdraft/graphs/sde_for_dalitz_staged.root')

In [53]:
pandas.DataFrame(sde_dalitz_data).ix[199, :]


Out[53]:
ada          0.078927
knnAda       0.071323
uGB+nn       0.089380
uGB+binFL    0.056233
uGB+knnFL    0.057149
uBoost       0.074280
Name: 199, dtype: float64

In [ ]: