In [1]:
import numpy
import root_numpy
# import pandas - no pandas today 
from astropy.table import Table
from sklearn.metrics import roc_auc_score
from scipy.special import logit

In [2]:
features = [
    # track itself
    'eta', 'partPt', 'partP',
    # track and B
    'cos_diff_phi', 'proj', 'diff_eta', 'ptB', 'R_separation', 'proj_T', 'proj_T2',
    # PID
    'PIDNNe',  'PIDNNk',  'PIDNNm', 'ghostProb', 
    # IP
    'IP', 'IPerr', 'IPs', 'IPPU', 
    # Other
    'veloch', 'partlcs', 'EOverP', 
    # deleted as probably inappropriate:
    # 'phi',  
    # 'diff_pt', 'nnkrec',
    # 'max_PID_mu_e', 'max_PID_mu_k', 'sum_PID_k_e', 'sum_PID_mu_e', 'max_PID_k_e', 'sum_PID_mu_k', 
]

In [3]:
data = Table(root_numpy.root2array('../datasets/MC/csv/WG/Bu_JPsiK/2012/Tracks.root'))
# data = Table(root_numpy.root2array('../datasets/MC/csv/WG/Bu_JPsiK/2012/Tracks.root', stop=30000000))

# data['label'] = (data['signB'] * data['signTrack']) > 0

# data['cos_diff_phi'] = numpy.cos(data['diff_phi'])
# data['diff_pt'] = data['ptB'] - data['partPt']
# data['R_separation'] = numpy.sqrt(data['diff_eta'] ** 2 + (1 - data['cos_diff_phi']) ** 2)
# # projection in transverse plane
# data['proj_T'] = data['cos_diff_phi'] * data['partPt']
# data['proj_T2'] = data['cos_diff_phi'] * data['partPt'] * data['ptB']

# data = data[data['ghostProb'] < 0.4]

# deleting NaNs
data = data[numpy.isfinite(data['IPs'])]

In [4]:
def add_event_id(data):
    groups = data['run'] + data['event'].astype(int) * (data['run'].max() + 1)
    lookup, groups = numpy.unique(groups, return_inverse=True)
    permutation = numpy.random.RandomState(42).permutation(len(lookup))
    data['event_id'] = permutation[groups]

In [5]:
add_event_id(data)

In [6]:
data = data.group_by('event_id')

In [7]:
root_numpy.array2root(numpy.array(data), './Bcharged_MC.root')

Prepare data


In [ ]:
real_data = Table(root_numpy.root2array('../datasets/data/csv/WG/Bu_JPsiK/2012/Tracks.root'))

In [ ]:
add_event_id(real_data)

In [ ]:
real_data = real_data.group_by('event_id')

In [ ]:
root_numpy.array2root(numpy.array(real_data), './Bcharged_data.root')