In [1]:
%pylab inline
In [4]:
import sys
sys.path.insert(0, "../../")
In [5]:
import pandas
import root_numpy
from folding_group import FoldingGroupClassifier
from decisiontrain import DecisionTrainClassifier
from rep.estimators import SklearnClassifier
In [6]:
data = pandas.DataFrame(root_numpy.root2array('../../datasets/MC/csv/WG/Bu_JPsiK/2012/Tracks.root'))
In [7]:
data.columns
Out[7]:
In [8]:
from utils import data_tracks_preprocessing
data = data_tracks_preprocessing(data)
In [9]:
len(data)
Out[9]:
In [10]:
data.columns
Out[10]:
In [11]:
hist(data.PIDNNpi[data.PIDNNm < 0].values)
Out[11]:
In [12]:
hist(data.ghostProb[data.PIDNNm < 0].values)
Out[12]:
In [13]:
features = ['diff_phi', 'partPt', 'partP', 'nnkrec', 'diff_eta', 'EOverP',
'ptB', 'proj', 'PIDNNe', 'PIDNNk', 'PIDNNm', 'PIDNNpi', 'PIDNNp',
'phi', 'IP', 'IPerr', 'veloch', 'ghostProb', 'IPPU', 'eta', 'partlcs', u'signB', u'signTrack',
'group_column']
In [14]:
x = numpy.unique(data.group_column)
In [15]:
from rep.utils import train_test_split_group
data_new = data[features]
In [16]:
data, _ = train_test_split_group(data.group_column, data_new, train_size=500000)
In [17]:
len(data)
Out[17]:
In [18]:
data.dtypes
Out[18]:
In [19]:
root_numpy.array2root(data.to_records(index=False), "tagging.root", mode='recreate')