In [1]:
    
%pylab inline
    
    
In [4]:
    
import sys
sys.path.insert(0, "../../")
    
In [5]:
    
import pandas
import root_numpy
from folding_group import FoldingGroupClassifier
from decisiontrain import DecisionTrainClassifier
from rep.estimators import SklearnClassifier
    
    
In [6]:
    
data = pandas.DataFrame(root_numpy.root2array('../../datasets/MC/csv/WG/Bu_JPsiK/2012/Tracks.root'))
    
In [7]:
    
data.columns
    
    Out[7]:
In [8]:
    
from utils import data_tracks_preprocessing
data = data_tracks_preprocessing(data)
    
    
    
In [9]:
    
len(data)
    
    Out[9]:
In [10]:
    
data.columns
    
    Out[10]:
In [11]:
    
hist(data.PIDNNpi[data.PIDNNm < 0].values)
    
    Out[11]:
    
In [12]:
    
hist(data.ghostProb[data.PIDNNm < 0].values)
    
    Out[12]:
    
In [13]:
    
features = ['diff_phi', 'partPt', 'partP', 'nnkrec', 'diff_eta', 'EOverP', 
            'ptB', 'proj', 'PIDNNe', 'PIDNNk', 'PIDNNm', 'PIDNNpi', 'PIDNNp',
            'phi', 'IP', 'IPerr', 'veloch', 'ghostProb', 'IPPU', 'eta', 'partlcs', u'signB', u'signTrack',
            'group_column']
    
In [14]:
    
x = numpy.unique(data.group_column)
    
In [15]:
    
from rep.utils import train_test_split_group
data_new = data[features]
    
In [16]:
    
data, _ = train_test_split_group(data.group_column, data_new, train_size=500000)
    
In [17]:
    
len(data)
    
    Out[17]:
In [18]:
    
data.dtypes
    
    Out[18]:
In [19]:
    
root_numpy.array2root(data.to_records(index=False), "tagging.root", mode='recreate')