In [1]:
import numpy
import root_numpy
# import pandas - no pandas today
from astropy.table import Table
from sklearn.metrics import roc_auc_score
from scipy.special import logit
In [2]:
features = [
# track itself
'eta', 'partPt', 'partP',
# track and B
'cos_diff_phi', 'proj', 'diff_eta', 'ptB', 'R_separation', 'proj_T', 'proj_T2',
# PID
'PIDNNe', 'PIDNNk', 'PIDNNm', 'ghostProb',
# IP
'IP', 'IPerr', 'IPs', 'IPPU',
# Other
'veloch', 'partlcs', 'EOverP',
# deleted as probably inappropriate:
# 'phi',
# 'diff_pt', 'nnkrec',
# 'max_PID_mu_e', 'max_PID_mu_k', 'sum_PID_k_e', 'sum_PID_mu_e', 'max_PID_k_e', 'sum_PID_mu_k',
]
In [3]:
data = Table(root_numpy.root2array('../datasets/MC/csv/WG/Bu_JPsiK/2012/Tracks.root'))
# data = Table(root_numpy.root2array('../datasets/MC/csv/WG/Bu_JPsiK/2012/Tracks.root', stop=30000000))
# data['label'] = (data['signB'] * data['signTrack']) > 0
# data['cos_diff_phi'] = numpy.cos(data['diff_phi'])
# data['diff_pt'] = data['ptB'] - data['partPt']
# data['R_separation'] = numpy.sqrt(data['diff_eta'] ** 2 + (1 - data['cos_diff_phi']) ** 2)
# # projection in transverse plane
# data['proj_T'] = data['cos_diff_phi'] * data['partPt']
# data['proj_T2'] = data['cos_diff_phi'] * data['partPt'] * data['ptB']
# data = data[data['ghostProb'] < 0.4]
# deleting NaNs
data = data[numpy.isfinite(data['IPs'])]
In [4]:
def add_event_id(data):
groups = data['run'] + data['event'].astype(int) * (data['run'].max() + 1)
lookup, groups = numpy.unique(groups, return_inverse=True)
permutation = numpy.random.RandomState(42).permutation(len(lookup))
data['event_id'] = permutation[groups]
In [5]:
add_event_id(data)
In [6]:
data = data.group_by('event_id')
In [7]:
root_numpy.array2root(numpy.array(data), './Bcharged_MC.root')
In [ ]:
real_data = Table(root_numpy.root2array('../datasets/data/csv/WG/Bu_JPsiK/2012/Tracks.root'))
In [ ]:
add_event_id(real_data)
In [ ]:
real_data = real_data.group_by('event_id')
In [ ]:
root_numpy.array2root(numpy.array(real_data), './Bcharged_data.root')