import sys
sys.path.insert(0, "../../")


In [5]:
import pandas
import root_numpy
from folding_group import FoldingGroupClassifier
from decisiontrain import DecisionTrainClassifier
from rep.estimators import SklearnClassifier

Read $B^\pm \to J\psi K^\pm$ MC samples

data = pandas.DataFrame(root_numpy.root2array('../../datasets/MC/csv/WG/Bu_JPsiK/2012/Tracks.root'))

from utils import data_tracks_preprocessing
data = data_tracks_preprocessing(data)

Initial statistics: {'parts': 33632195, 'Events': 1488891}
after  (ghostProb < 0.4)  selection, statistics: {'parts': 32813556, 'Events': 1488885}
after   ( (PIDNNk > 0.0) | (PIDNNm > 0.0) | (PIDNNe > 0.0) | (PIDNNpi > 0.0) | (PIDNNp > 0.0))   selection, statistics: {'parts': 32808324, 'Events': 1488885}

Index([u'run', u'event', u'Bmass', u'time', u'i', u'mult', u'partP', u'partPt',
       u'ptB', u'IPs', u'IP', u'IPerr', u'partlcs', u'EOverP', u'ghostProb',
       u'IPPU', u'nnkrec', u'PIDNNk', u'PIDNNpi', u'PIDNNp', u'PIDNNm',
       u'PIDNNe', u'diff_eta', u'diff_phi', u'phi', u'eta', u'proj', u'ID',
       u'veloch', u'signB', u'signTrack', u'Dist_phi', u'N_sig_sw', u'mu_cut',
       u'e_cut', u'K_cut', u'MCID', u'OS_SS', u'xFlag', u'K_MCID', u'BOosc',
       u'group_column', u'event_id', u'diff_pt', u'cos_diff_phi',
       u'max_PID_mu_k', u'sum_PID_mu_k', u'max_PID_mu_e', u'sum_PID_mu_e',
       u'max_PID_k_e', u'sum_PID_k_e', u'label'],

hist(data.PIDNNpi[data.PIDNNm < 0].values)

hist(data.ghostProb[data.PIDNNm < 0].values)

features = ['diff_phi', 'partPt', 'partP', 'nnkrec', 'diff_eta', 'EOverP', 
            'ptB', 'proj', 'PIDNNe', 'PIDNNk', 'PIDNNm', 'PIDNNpi', 'PIDNNp',
            'phi', 'IP', 'IPerr', 'veloch', 'ghostProb', 'IPPU', 'eta', 'partlcs', u'signB', u'signTrack',

x = numpy.unique(data.group_column)

from rep.utils import train_test_split_group
data_new = data[features]

data, _ = train_test_split_group(data.group_column, data_new, train_size=500000)

In [18]:

diff_phi        float32
partPt          float32
partP           float32
nnkrec            int32
diff_eta        float32
EOverP          float32
ptB             float32
proj            float32
PIDNNe          float32
PIDNNk          float32
PIDNNm          float32
PIDNNpi         float32
PIDNNp          float32
phi             float32
IP              float32
IPerr           float32
veloch          float32
ghostProb       float32
IPPU            float32
eta             float32
partlcs         float32
signB             int32
signTrack       float32
group_column      int64
dtype: object

root_numpy.array2root(data.to_records(index=False), "tagging.root", mode='recreate')