In [1]:
%pylab inline
In [2]:
import numpy
import pandas
import root_numpy
In [3]:
import sys
sys.path.insert(0, "../")
In [4]:
selection = " ((PIDNNk > 0) | (PIDNNm > 0) | (PIDNNe > 0) | (PIDNNpi > 0) | (PIDNNp > 0)) & (ghostProb < 0.4)"
In [5]:
from itertools import combinations
from utils import add_diff_pt, get_events_statistics
def add_features(data, event_id_column='event_id'):
event_id = data.run.apply(str) + '_' + data.event.apply(int).apply(str)
data['group_column'] = numpy.unique(event_id, return_inverse=True)[1]
# all weights are 1, because this is MC
data['N_sig_sw'] = 1
data[event_id_column] = event_id
add_diff_pt(data)
# add cos(diff_phi)
data.loc[:, 'cos_diff_phi'] = numpy.cos(data.diff_phi.values)
PIDs = {'k': data.PIDNNk.values,
'e': data.PIDNNe.values,
'mu': data.PIDNNm.values,
}
for (pid_name1, pid_values1), (pid_name2, pid_values2) in combinations(PIDs.items(), 2):
data.loc[:, 'max_PID_{}_{}'.format(pid_name1, pid_name2)] = numpy.maximum(pid_values1, pid_values2)
data.loc[:, 'sum_PID_{}_{}'.format(pid_name1, pid_name2)] = pid_values1 + pid_values2
data['label'] = data.signB.values * data.signTrack.values > 0
In [6]:
data = pandas.read_csv('../datasets/data/csv/WG/Bd_JPsiKstar/2012/Tracks.csv', sep='\t')
data = data.query('(Bmass < 5.4) & (Bmass > 5.16)')
add_features(data)
data.loc[~numpy.isfinite(data['IPs'].values), 'IPs'] = -999
data.loc[~numpy.isfinite(data['IPerr'].values), 'IPerr'] = -999
get_events_statistics(data)
Out[6]:
In [7]:
splot = pandas.read_csv('../datasets/data/csv/WG/Bd_JPsiKstar/2012/sPlot_Bd_JPsiKstar.csv',
sep=' ', header=None).drop([8], axis=1)
splot.columns = ['run', 'evTop', 'evBottom', 'Bmass', 'Nbkg_tot_sw', 'L_Nbkg_tot', 'Nsig_tot_sw', 'L_Nsig_tot']
event_id_column = 'event_id'
splot[event_id_column] = splot.run.apply(str) + '_' + (splot.evTop * 1e6 + splot.evBottom).apply(int).apply(str)
index = numpy.unique(splot[event_id_column], return_index=True)[1]
splot.index = numpy.arange(len(splot))
splot = splot.ix[index, :]
In [8]:
splot.index = splot[event_id_column]
data.index = data[event_id_column]
In [9]:
splot.index = splot[event_id_column]
data.index = data[event_id_column]
start = 0
end = 1000000
sweight = []
for batch in range(33):
if end > len(data):
end = len(data)
sweight.append(splot.loc[data.index[start:end], 'Nsig_tot_sw'].values)
start = end
end += 1000000
In [10]:
sweight = numpy.concatenate(sweight)
data.N_sig_sw = sweight
In [11]:
N_B_events = numpy.sum(splot.Nsig_tot_sw)
N_B_events
Out[11]:
In [12]:
data = data.query(selection)
data = data.query("(time >= 0.2) & (time <= 8)")
In [13]:
import cPickle
with open('../models/dt_WG_2012.pkl', 'r') as f:
tt_folding = cPickle.load(f)
with open('../models/calibrator_tracks_WG_2012.pkl', 'r') as f:
calibrator_tracks = cPickle.load(f)
with open('../models/calibrator_B_WG_2012.pkl', 'r') as f:
calibrator_B = cPickle.load(f)
with open('../models/loss_dt/dt_ss_os.pkl', 'r') as f:
tt_os_ss = cPickle.load(f)
with open('../models/loss_dt/os_ss_calibrator.pkl', 'r') as f:
calibrator_os_ss = cPickle.load(f)
In [14]:
import cPickle
with open('../models/dt_kstar_MC.pkl', 'r') as f:
tt_folding_MC = cPickle.load(f)
with open('../models/calibrator_tracks_kstar_MC.pkl', 'r') as f:
calibrator_tracks_MC = cPickle.load(f)
with open('../models/calibrator_B_kstar_MC.pkl', 'r') as f:
calibrator_B_MC = cPickle.load(f)
In [15]:
import cPickle
with open('../models/dt_MC.pkl', 'r') as f:
tt_folding_MC_k = cPickle.load(f)
with open('../models/calibrator_tracks_MC.pkl', 'r') as f:
calibrator_tracks_MC_k = cPickle.load(f)
with open('../models/calibrator_B_MC.pkl', 'r') as f:
calibrator_B_MC_k = cPickle.load(f)
In [16]:
from utils import compute_B_prob_using_part_prob
part_probs = tt_folding.predict_proba(data)[:, 1]
prior = tt_os_ss.predict_proba(data)[:, 1]
part_probs_calib = calibrator_tracks.predict_proba(part_probs)
prior_calib = calibrator_os_ss.predict_proba(prior)
In [17]:
hist(prior_calib, bins=100, weights=data.N_sig_sw.values);
In [18]:
part_probs_union_pid = part_probs_calib.copy()
mask = data.PIDNNpi.values > 0.8
part_probs_union_pid[mask] = 1 - part_probs_union_pid[mask]
In [19]:
part_probs_union = prior_calib*(1 - part_probs_calib) + (1-prior_calib)*part_probs_calib
In [20]:
Bsign, Bweight, Bprobs, Bevent = compute_B_prob_using_part_prob(data, part_probs_union,
sign_part_column='signTrack')
Bprobs_calib = calibrator_B.predict_proba(Bprobs)
mistag = numpy.minimum(Bprobs_calib, 1 - Bprobs_calib)
tag = numpy.ones(len(Bprobs_calib))
tag[Bprobs_calib < 0.5] = -1
In [21]:
Bsign_pid, Bweight_pid, Bprobs_pid, Bevent_pid = compute_B_prob_using_part_prob(data, part_probs_union_pid,
sign_part_column='signTrack')
Bprobs_calib_pid = calibrator_B.predict_proba(Bprobs_pid)
mistag_pid = numpy.minimum(Bprobs_calib_pid, 1 - Bprobs_calib_pid)
tag_pid = numpy.ones(len(Bprobs_calib_pid))
tag_pid[Bprobs_calib_pid < 0.5] = -1
In [22]:
from utils import compute_B_prob_using_part_prob
part_probs_mc = tt_folding_MC.predict_proba(data)[:, 1]
part_probs_calib_mc = calibrator_tracks_MC.predict_proba(part_probs_mc)
Bsign_mc, Bweight_mc, Bprobs_mc, Bevent_mc = compute_B_prob_using_part_prob(data, part_probs_calib_mc,
sign_part_column='signTrack')
Bprobs_calib_mc = calibrator_B_MC.predict_proba(Bprobs_mc)
mistag_mc = numpy.minimum(Bprobs_calib_mc, 1 - Bprobs_calib_mc)
tag_mc = numpy.ones(len(Bprobs_calib_mc))
tag_mc[Bprobs_calib_mc < 0.5] = -1
In [23]:
from utils import compute_B_prob_using_part_prob
part_probs_mc_k = tt_folding_MC_k.predict_proba(data)[:, 1]
part_probs_calib_mc_k = calibrator_tracks_MC_k.predict_proba(part_probs_mc_k)
part_probs_union_mc_k = prior_calib*(1 - part_probs_calib_mc_k) + (1-prior_calib)*part_probs_calib_mc_k
Bsign_mc_k, Bweight_mc_k, Bprobs_mc_k, Bevent_mc_k = compute_B_prob_using_part_prob(data, part_probs_union_mc_k,
sign_part_column='signTrack')
Bprobs_calib_mc_k = calibrator_B_MC_k.predict_proba(Bprobs_mc_k)
mistag_mc_k = numpy.minimum(Bprobs_calib_mc_k, 1 - Bprobs_calib_mc_k)
tag_mc_k = numpy.ones(len(Bprobs_calib_mc_k))
tag_mc_k[Bprobs_calib_mc_k < 0.5] = -1
In [24]:
ids = numpy.unique(data.event_id, return_index=True)[1]
data.index = numpy.arange(len(data))
Bmass = data.ix[ids, 'Bmass'].values
Btime = data.ix[ids, 'time'].values
In [25]:
hist(Btime, bins=100);
In [26]:
assert sum(data.ix[ids, 'event_id'] != Bevent) == 0
In [27]:
from utils import get_events_number
N_B_passed = float(get_events_number(data))
tagging_efficiency = N_B_passed / N_B_events
tagging_efficiency_delta = sqrt(tagging_efficiency * (1 - tagging_efficiency) * \
numpy.sum(splot.Nsig_tot_sw.values ** 2)) / N_B_events
tagging_efficiency, tagging_efficiency_delta
Out[27]:
In [49]:
prepared_data = pandas.DataFrame({'tag': numpy.array(tag, dtype='int32'),
'mistag': mistag,
'flavour': numpy.array(Bsign, dtype='int32'),
'decay_time': Btime,
'Bmass': Bmass,
'resolution': [50*1e-3] * len(Bmass),
'probs': Bprobs_calib,
'weight': Bweight,
'mistag_uncalib': numpy.minimum(Bprobs, 1 - Bprobs),
'tag_uncalib': numpy.where(Bprobs > 0.5, 1, -1)
})
root_numpy.array2root(prepared_data.to_records(index=False), "../for_epm/kstar.root",
mode='recreate')
In [37]:
numpy.average((1 - 2*Bprobs_calib)**2, weights=Bweight)
Out[37]:
In [38]:
prepared_data = pandas.DataFrame({'tag': numpy.array(tag_pid, dtype='int32'),
'mistag': mistag_pid,
'flavour': numpy.array(Bsign_pid, dtype='int32'),
'decay_time': Btime,
'Bmass': Bmass,
'resolution': [50*1e-3] * len(Bmass),
'probs': Bprobs_calib_pid,
'weight': Bweight_pid,
'mistag_uncalib': numpy.minimum(Bprobs_pid, 1 - Bprobs_pid)
})
root_numpy.array2root(prepared_data.to_records(index=False), "../for_epm/kstar_pid_inverting.root",
mode='recreate')
In [39]:
numpy.average((1 - 2*Bprobs_calib_pid)**2, weights=Bweight_pid)
Out[39]:
In [40]:
prepared_data = pandas.DataFrame({'tag': numpy.array(tag_mc, dtype='int32'),
'mistag': mistag_mc,
'flavour': numpy.array(Bsign_mc, dtype='int32'),
'decay_time': Btime,
'Bmass': Bmass,
'resolution': [50*1e-3] * len(Bmass),
'probs': Bprobs_calib_mc,
'weight': Bweight_mc,
'mistag_uncalib': numpy.minimum(Bprobs_mc, 1 - Bprobs_mc)
})
root_numpy.array2root(prepared_data.to_records(index=False), "../for_epm/kstar_mc_kstar.root",
mode='recreate')
In [41]:
numpy.average((1 - 2*Bprobs_calib_mc)**2, weights=Bweight_mc)
Out[41]:
In [42]:
prepared_data = pandas.DataFrame({'tag': numpy.array(tag_mc_k, dtype='int32'),
'mistag': mistag_mc_k,
'flavour': numpy.array(Bsign_mc_k, dtype='int32'),
'decay_time': Btime,
'Bmass': Bmass,
'resolution': [50*1e-3] * len(Bmass),
'probs': Bprobs_calib_mc_k,
'weight': Bweight_mc_k,
'mistag_uncalib': numpy.minimum(Bprobs_mc_k, 1 - Bprobs_mc_k)
})
root_numpy.array2root(prepared_data.to_records(index=False), "../for_epm/kstar_mc_k.root",
mode='recreate')
In [43]:
numpy.average((1 - 2*Bprobs_calib_mc_k)**2, weights=Bweight_mc_k)
Out[43]:
In [ ]: