rm output.csv
rm output_preprocessed.csv
./ttHbb_ROOT_file_to_CSV_file.py \
--fileroot=output_ttH.root \
--classlabel=1 \
--filecsv=output.csv \
--maxevents=1000 \
--headings=true
./ttHbb_ROOT_file_to_CSV_file.py \
--fileroot=output_ttbb.root \
--classlabel=0 \
--filecsv=output.csv \
--maxevents=1000 \
--headings=true
./ttHbb_plots_of_CSV.py \
--infile=output.csv \
--histogramcomparisons=true \
--scattermatrix=true
./ttHbb_preprocess_CSV_file.py --infile=output.csv \
--outfile=output_preprocessed.csv
rm data.csv
rm data_preprocessed.csv
filename_ttH="ttH_group.phys-higgs.10205167._000002.out.root"
filename_ttbb="ttbb_group.phys-higgs.10205185._000001.out.root"
number_events="25000"
./ttHbb_examine_ROOT_file.py --fileroot="${filename_ttH}"
./ttHbb_examine_ROOT_file.py --fileroot="${filename_ttbb}"
./ttHbb_ROOT_file_to_CSV_file.py \
--fileroot="${filename_ttH}" \
--classlabel=1 \
--filecsv=data.csv \
--maxevents="${number_events}" \
--headings=true
./ttHbb_ROOT_file_to_CSV_file.py \
--fileroot="${filename_ttbb}" \
--classlabel=0 \
--filecsv=data.csv \
--maxevents="${number_events}" \
--headings=true
./ttHbb_preprocess_CSV_file.py --infile=data.csv \
--outfile=data_preprocessed.csv
./ttHbb_plots_of_CSV.py \
--infile=data.csv \
--histogramcomparisons=true \
--scattermatrix=false \
--directoryplots=plots_raw
./ttHbb_plots_of_CSV.py \
--infile=data_preprocessed.csv \
--histogramcomparisons=true \
--scattermatrix=false \
--directoryplots=plots_preprocessed
rm data.csv
rm data_preprocessed.csv
filename_ttH="ttH_group.phys-higgs.10205167._000002.out.root"
filename_ttbb="ttbb_group.phys-higgs.10205185._000001.out.root"
number_events="2500"
#./ttHbb_examine_ROOT_file.py --fileroot="${filename_ttH}"
#./ttHbb_examine_ROOT_file.py --fileroot="${filename_ttbb}"
./ttHbb_ROOT_file_to_CSV_file.py \
--fileroot="${filename_ttH}" \
--classlabel=1 \
--filecsv=data.csv \
--maxevents="${number_events}" \
--headings=true
./ttHbb_ROOT_file_to_CSV_file.py \
--fileroot="${filename_ttbb}" \
--classlabel=0 \
--filecsv=data.csv \
--maxevents="${number_events}" \
--headings=true
./ttHbb_preprocess_CSV_file.py --infile=data.csv \
--outfile=data_preprocessed.csv
./ttHbb_plots_of_CSV.py \
--infile=data.csv \
--histogramcomparisons=true \
--scattermatrix=false \
--directoryplots=plots_raw
./ttHbb_plots_of_CSV.py \
--infile=data_preprocessed.csv \
--histogramcomparisons=true \
--scattermatrix=false \
--directoryplots=plots_preprocessed
Some terminology used is defined as follows:
term | description |
---|---|
${jj}$ pair | a system made using any two jets |
${bb}$ pair | a system made using two ${b}$-tagged jets |
${bj}$ pair | a system made using one ${b}$-tagged jet and any other jet |
${uu}$ pair | a system made using any two untagged jets |
variable | description |
---|---|
nJets | total number of jets |
nBTags | total number of jets passing chosen ${b}$-tagging criteria set in cuts file |
nPrimaryVtx | total number of primary vertices |
nElectrons | total number of electrons |
nMuons | total number of muons |
variable | description |
---|---|
truthHiggsDaughtersID | vector of ints storing the PDGIDs of the two decay products of the Higgs |
truthHiggsWDaughtersID | vector of ints storing the PDGIDs of the decay products of the ${W}$ daughters of the Higgs (filled only for ${H\to WW}$ events) |
truthHiggsWDaughtersID | vector of ints storing the PDGIDs of the decay products of the Z daughters of the Higgs (filled only for ${H\to ZZ}$ events) |
variable | description |
---|---|
Centrality_all | centrality calculated using all jets and leptons |
Mbb_MindR | mass of ${bb}$ pair with minimum ${dR}$ |
dRbb_MaxPt | ${dR}$ of ${bb}$ pair with maximum ${p_{T}}$ |
Mjj_MaxPt | mass of ${jj}$ pair with maximum ${p_{T}}$ |
pT_jet5 | ${p_{T}}$ of fifth jet (ordered by ${b}$-tagged jets and then non ${b}$-tagged jets) |
H1_all | second Fox-Wolfram moment calculated using all jets and leptons |
dRbb_avg | average ${dR}$ of all ${bb}$ pairs |
Mbj_MaxPt | mass of ${b}$ jet and untagged jet pair with maximum ${p_{T}}$ |
dRlepbb_MindR | ${dR}$ of lepton and the ${bb}$ pair with smallest ${dR}$ |
Muu_MindR | mass of untagged jets pair with minimum ${dR}$ |
Aplan_bjets | aplanarity calculated using only ${b}$-tagged jets |
Njet_pt40 | number of jets with ${p_{T}>40\textrm{ GeV}}$ |
Mbj_MindR | mass of ${b}$ jet and untagged jet pair with minimum ${dR}$ |
HT_jets | scalar sum of ${ET}$ calculated using all jets |
Mjj_MindR | mass of ${jj}$ pair with minimum ${dR}$ |
Mbb_MaxPt | mass of ${bb}$ pair with maximum ${p_{T}}$ |
pTuu_MindR | ${p_{T}}$ of untagged jets pair with minimum ${dR}$ |
Mbb_MaxM | mass of ${bb}$ pair with maximum mass |
dRuu_MindR | minimum ${dR}$ between two untagged jets |
Mjjj_MaxPt | mass of jet triplet with maximum ${p_{T}}$ |
variable | description |
---|---|
Centrality_all | centrality calculated using all jets and leptons |
Mbb_MindR | mass of ${bb}$ pair with minimum ${dR}$ |
dRbb_MaxPt | ${dR}$ of ${bb}$ pair with maximum ${p_{T}}$ |
Mjj_MaxPt | mass of ${jj}$ pair with maximum ${p_{T}}$ |
dEtajj_MaxdEta | maximum ${d\eta}$ between any two jets |
MHiggs | mass of Higgs candidate system |
dRHl_MindR | smallest ${dR}$ between Higgs candidate and a lepton |
NHiggs_30 | number of Higgs candidates within 30 GeV of Higgs mass (125 GeV) |
Aplan_jets | aplanarity calculated using all jets |
Mjj_MinM | minimum mass of any two jets |
dRHl_MaxdR | maximum ${dR}$ between Higgs candidate and a lepton |
Mjj_HiggsMass | mass jet pair closest to Higgs mass (125 GeV) |
HT_all | scalar some of ${ET}$ of all jets and leptons |
dRbb_MaxM | ${dR}$ of ${bb}$ pair with maximum mass |
dRlj_MindR | minimum ${dR}$ between a lepton and a jet |
H4_all | fifth Fox-Wolfram moment of all jets and leptons |
pT_jet3 | ${p_{T}}$ of 3rd jet (${b}$ jets first, then untagged jets) |
variable | description |
---|---|
Mll | dilepton invariant mass |
pTll | ${p_{T}}$ of dilepton system |
weight_dil_fakes_shape | systematic weight for reweight to same sign data |
variable | description |
---|---|
el_LHLoose | vector of whether electron passes LHLoose ID |
el_LHMedium | vector of whether electron passes LHMedium ID |
el_LHTight | vector of whether electron passes LHTight ID |
el_isoGradient | vector of whether electron passes gradient isolation |
el_isoGradientLoose | vector of whether electron passes gradient loose isolation |
el_isoTight | vector of whether electron passes tight isolation |
el_isoLoose | vector of whether electron passes loose isolation |
el_isoLooseTrackOnly | vector of whether electron passes loose track only isolation |
mu_Tight | vector of whether muon passes tight quality |
mu_Medium | vector of whether muon passes medium quality |
mu_Loose | vector of whether muon passes loose quality |
mu_VeryLoose | vector of whether muon passes very loose quality |
mu_isoGradient | vector of whether muon passes gradient isolation |
mu_isoGradientLoose | vector of whether muon passes gradient loose isolation |
mu_isoTight | vector of whether muon passes tight isolation |
mu_isoLoose | vector of whether muon passes loose isolation |
mu_isoLooseTrackOnly | vector of whether muon passes loose track only isolation |
variable | description |
---|---|
TopHeavyFlavorFilterFlag | ${t\bar{t}}$ + heavy flavour filter decision |
variable | description |
---|---|
semilepMVAreco_higgs_mass | Higgs mass from reconstuction |
semilepMVAreco_bbhiggs_dR | ${dR}$ of ${bb}$ pair from Higgs from reconstruction |
semilepMVAreco_BDT_output | highest reco. BDT output |
semilepMVAreco_BDT_withH_output | highest reco. BDT output trained with Higgs variables |
semilepMVAreco_Ncombinations | number of jets combinations |
semilepMVAreco_nuApprox_recoBDT | neutrino approximation type used in the good combination from the reco. BDT: positiveSolution = 0, negativeSolution = 1, realonlySolution = 2 |
semilepMVAreco_nuApprox_recoBDT_withH | neutrino approximation type used in the good combination from the recoBDT_withH |
jet_semilepMVAreco_recoBDT_cand | jet decorated with partons candidate from reco. BDT: Nondefined = 0, b1fromHiggs = 1, b2fromHiggs = 2, bfromlepTop = 3, bfromhadTop = 4, q1fromW = 5, q2fromW = 6 |
jet_semilepMVA_recoBDT_withH_cand | jet decorated with partons candidate from reco. BDT trained with Higgs variables |
variable | description |
---|---|
ClassifBDTOutput_basic | BDT output with Run 1 variables |
ClassifBDTOutput_withReco_basic | BDT output with Run 1 + reco. variables |
variable | description |
---|---|
dileptonRecoBDT | BDT output from the best scoring combination in each event |
dileptonRecoBDTnoHiggs | BDT output from the best scoring combination in each event in the no Higgs BDT |
jet_dilepRecoMatch | bit-encoded matching for each jet in the event corresponding to the best combination |
jet_dilepRecoMatch_BDTnoHiggs | bit-encoded matching for each jet in the event corresponding to the best combination without Higgs information |
with dileptonMVAreco_variables set to true:
variable | definition |
---|---|
dilepReco_Mass_Higgs | invariant mass of the jets matched to the Higgs |
dilepReco_dEtaHiggsl_mindEta | minimum ${d\eta}$ between the reconstructed Higgs and a lepton |
dilepReco_dEtaHiggsl_maxdEta | maximum ${d\eta}$ between the reconstructed Higgs and a lepton |
dilepReco_dEtaHiggsbtop_mindEta | minimum ${d\eta}$ between the reconstructed Higgs and a jet matched to a ${b}$ from a ${t}$ |
dilepReco_dEtaHiggsbtop_maxdEta | maximum ${d\eta}$ between the reconstructed Higgs and a jet matched to a ${b}$ from a ${t}$ |
With dileptonMVAreco_bTagWPs set to more than one, each variable has the working point appended to the end and is saved for each working point.
variable | definition |
---|---|
dileptonClassifBDT | BDT output for the event if in the ${\geq 4j \geq 4b}$ or ${\geq 4j3b}$ region |
dileptonClassifBDT_noReco | BDT output for the event if in the ${\geq 4j \geq 4b}$ or ${\geq 4j3b}$ region for the BDT not using reco. variables |
variable | definition |
---|---|
dEtabb_Avg | average ${\Delta\eta}$ between two ${b}$ jets |
dRbb_MaxPt | ${\Delta R}$ of the two ${b}$ jets with max ${p_{T}}$ |
Mbb_HiggsMass | mass of two ${b}$ jets closest to Higgs mass (125 GeV) |
dRbb_HiggsMass | ${\Delta R}$ of two ${b}$ jets closest to Higgs mass |
nHiggsbb30 | number of ${b\bar{b}}$ pairs within 30 GeV of Higgs mass |
dEtajl_MaxdEta | maximum ${\Delta\eta}$ between any jet and a lepton |
H0_all | first Fox-Wolfram moment calculated with all objects |
Mbb_MinM | mass of ${b\bar{b}}$ pair with minimum mass |
Aplanarity_jets | aplanarity of jets |
Centrality_all | centrality of all objects |
Mbb_MindR | mass of ${b\bar{b}}$ pair with minimum ${\Delta R}$ |
dEtabb_MaxdEta | maximum ${\Delta\eta}$ between two ${b}$ jets |
dEtajj_Avg | average ${\Delta\eta}$ between jets |
dRbb_MaxM | ${\Delta R}$ between ${b\bar{b}}$ pair with maximum mass |
nHiggsjj30 | number of jj pairs within 30 GeV of Higgs mass |
HT_all | scalar ${p_{T}}$ sum of all objects |
H2_jets | third Fox-Wolfram moment calculated using jets |
If TRF is used, these outputs are created in the 4 inclusive ${b}$ and 3 exclusive ${b}$ analysis regions as separate branches, but with the direct tagging variables still saved.
The current resolved BDT is TTHClassBDTOutput_withBTag_inclusive
and the current boosted BDT is TTHBOOSTEDBDTOutput_RC_8Var_xTrained
.
For each signal region, information from the output of the reconstruction BDT (which assigns jets to the final partons from which they originated) is combined with other kinematic variables in a classification BDT using ${t\bar{t}H\left(H\to b\bar{b}\right)}$ as signal and ${t\bar{t}}$ as background.
The classification BDT is built separately for events with exactly 5 jets or at least 6 jets. It was found that an inclusive training on events with at least four ${b}$-tagged jets at an efficiency of 85% performed as well as dedicated trainings in each specific signal region, on the condition that MV2c10 ${b}$-tagging variables of some of the jets are included in the training. For simplicity, only inclusive trainings are used, trained in the (>= 6j, >= 4b) and (5j, >= 4b) regions at 85% ${b}$-tagging efficiency.
Some of the variables used rely on which jets are considered ${b}$-tagged. In this inclusive training approach, mixing together regions of various tightness in ${b}$-tagging, a unified definition was chosen: such variables (${\Delta R^{\text{avg}}_{bb}}$, ${\Delta R^{\text{max} p_{T}}_{bb}}$, ${m^{\text{min} \Delta R}_{bb}}$, ${N^{\text{Higgs}}_{30}}$ and ${\Delta R^{\text{min}\Delta R}_{\text{lep}-bb}}$) are computed only from the four leading jets sorted in MV2c10 ${b}$-tagging weight.
The training parameters for the TMVA BDT are as follows:
parameter | value |
---|---|
BoostType | AdaBoost |
AdaBoostBeta | 0.15 |
NTrees | 250 |
MaxDepth | 5 |
nCuts | 80 |
MinNodeSize | 4% |
In order to maximize the size of the training sample, the absolute value of the event weight is taken, so that events with negative weights are used. Input variables are in agreement between events with positive and negative weights. The performance is similar when assessed on the full sample with positive and negative weights, the sample with absolute weights, and the sample with only events with positive weight.
The variables used in these BDTs are as follows:
general kinematic variables:
variable | n-tuple name | description | >= 6j | 5j |
---|---|---|---|---|
${\Delta R^{\text{avg}}_{bb}}$ | dRbb_avg_Sort4 |
average ${\Delta R}$ for all ${b}$-tagged jet pairs | yes | yes |
${\Delta R^{\text{max} p_{T}}_{bb}}$ | dRbb_MaxPt_Sort4 |
${\Delta R}$ between the two ${b}$-tagged jets with the largest vector sum ${p_{T}}$ | yes | - |
${\Delta \eta^{\textrm{max}\Delta\eta}_{jj}}$ | dEtajj_MaxdEta |
maximum ${\Delta\eta}$ between any two jets | yes | yes |
${m^{\text{min} \Delta R}_{bb}}$ | Mbb_MindR_Sort4 |
mass of the combination of the two ${b}$-tagged jets with the smallest ${\Delta R}$ | yes | - |
${m^{\text{min} \Delta R}_{jj}}$ | mass of the combination of any two jets with the smallest ${\Delta R}$ | - | yes | |
${p^{\text{jet} 5}_{T}}$ | pT_jet5 |
${p_{T}}$ of the fifth leading jet | yes | yes |
${N^{\text{Higgs}}_{30}}$ | nHiggsbb30_Sort4 |
number of ${b}$-jet pairs with invariant mass within 30 GeV of the Higgs boson mass | yes | yes |
${H^{\text{had}}_{T}}$ | scalar sum of jet ${p_{T}}$ | - | yes | |
${\Delta R^{\text{min}\Delta R}_{\text{lep}-bb}}$ | ${\Delta R}$ between the lepton and the combination of the two ${b}$-tagged jets with the smallest ${\Delta R}$ | - | yes | |
aplanarity | Aplanarity_jets |
${1.5\lambda_{2}}$, where ${\lambda_{2}}$ is the second eigenvalue of the momentum tensor built with all jets | yes | yes |
centrality | Centrality_all |
the scalar sum of the ${p_{T}}$ divided by the sum of the ${E}$ for all jets and the lepton | yes | yes |
${H1}$ | H1_all |
second Fox-Wolfram moment computed using all jets and the lepton | yes | yes |
variables from reconstruction BDT output:
variable | n-tuple name | description | >=6j | 5j |
---|---|---|---|---|
BDT | BDT output | yes* | yes* | |
${m_{H}}$ | TTHReco_T85_best_Higgs_mass |
Higgs boson mass | yes | yes |
${m_{H,b_{\text{lep top}}}}$ | TTHReco_T85_best_Higgsleptop_mass |
mass of Higgs boson and ${b}$-jet from leptonic top | yes | - |
${\Delta R_{\text{Higgs }bb}}$ | TTHReco_T85_best_bbHiggs_dR |
${\Delta R}$ between ${b}$-jets from the Higgs boson | yes | yes |
${\Delta R_{H,t\bar{t}}}$ | TTHReco_T85_withH_best_Higgsttbar_dR |
${\Delta R}$ between Higgs boson and ${t\bar{t}}$ system | yes* | yes* |
${\Delta R_{H,\text{lep top}}}$ | TTHReco_T85_best_Higgsleptop_dR |
${\Delta R}$ between Higgs boson and leptonic ${t}$ | yes | - |
${\Delta R_{H,b_{\text{had top}}}}$ | TTHReco_T85_withH_best_TTHReco_T85_withH ? |
${\Delta R}$ between Higgs boson and ${b}$-jet from hadronic ${t}$ | - | yes* |
For variables from the reconstruction BDT, those with an asterisk are from the BDT using Higgs boson information while those with no asterisk are from the BDT without Higgs boson information.
variables from likelihood calculation:
variable | n-tuple name | description | >=6j | 5j |
---|---|---|---|---|
D | LHD_Discriminant |
likelihood discriminant | yes | yes |
variables from ${b}$-tagging:
variable | n-tuple name | description | >=6j | 5j |
---|---|---|---|---|
${w^{H}_{b}}$ | TTHReco_T85_best_bbHiggs_tagWeightBin_sum |
sum of binned ${b}$-tagging weights of jets | yes | yes |
${B_{j^{3}}}$ | jet_mv2_order_3_tagWeightBin |
third jet binned ${b}$-tagging weight (sorted by weight) | yes | yes |
${B_{j^{4}}}$ | jet_mv2_order_4_tagWeightBin |
fourth jet binned ${b}$-tagging weight (sorted by weight) | yes | yes |
${B_{j^{5}}}$ | jet_mv2_order_5_tagWeightBin |
fifth jet binned ${b}$-tagging weight (sorted by weight) | yes | yes |
variable | imputation label |
---|---|
Centrality_all | -9 |
ClassifBDTOutput_6jsplit | -9.0 |
ClassifBDTOutput_basic | -9.0 |
ClassifBDTOutput_withReco_6jsplit | -9.0 |
ClassifBDTOutput_withReco_basic | -9.0 |
dEtajj_MaxdEta | -1.0 |
dRbb_avg | 0.0 |
dRbb_MaxM | -1.0 |
dRbb_MaxPt | 0.0 |
dRbb_min | -9.0 |
dRbj_Wmass | 0.0 |
dRHl_MaxdR | -1.0 |
dRHl_MindR | -1.0 |
dRlj_MindR | -1.0 |
dRuu_MindR | 0.0 |
H4_all | -9.0 |
HiggsbbM | -9.0 |
HT_all | -1.0 |
Mbb_MaxM | 0.0 |
Mbb_MaxPt | 0.0 |
Mbb_MindR | 0.0 |
Mbj_MaxPt | 0.0 |
Mbj_MindR | 0.0 |
Mbj_Wmass | 0.0 |
MHiggs | -1.0 |
Mjj_HiggsMass | -1.0 |
Mjj_MinM | -1.0 |
NBFricoNN_dil | -2.0 |
NBFricoNN_ljets | -2.0 |
NHiggs_30 | -1 |
pT_jet3 | -1.0 |
pT_jet5 | -1.0 |
pTuu_MindR | 0.0 |
SecondLjetM | -1.0 |
SecondLjetPt | -1.0 |
semilepMVAreco_b1higgsbhadtop_dR | -9.0 |
semilepMVAreco_bbhiggs_dR | -9.0 |
semilepMVAreco_BDT_output | -9.0 |
semilepMVAreco_BDT_output_6jsplit | -9.0 |
semilepMVAreco_BDT_output_truthMatchPattern | -9 |
semilepMVAreco_BDT_withH_output | -9.0 |
semilepMVAreco_BDT_withH_output_6jsplit | -9.0 |
semilepMVAreco_BDT_withH_output_truthMatchPattern | -9 |
semilepMVAreco_hadWb1Higgs_mass | -9.0 |
semilepMVAreco_higgsbhadtop_withH_dR | -9.0 |
semilepMVAreco_higgsbleptop_mass | -9.0 |
semilepMVAreco_higgsbleptop_withH_dR | -9.0 |
semilepMVAreco_higgslep_dR | -9.0 |
semilepMVAreco_higgsleptop_dR | -9.0 |
semilepMVAreco_higgs_mass | -9.0 |
semilepMVAreco_higgsq1hadW_mass | -9.0 |
semilepMVAreco_higgsttbar_withH_dR | -9.0 |
semilepMVAreco_leptophadtop_dR | -9.0 |
semilepMVAreco_leptophadtop_withH_dR | -9.0 |
semilepMVAreco_Ncombinations | -9 |
semilepMVAreco_nuApprox_recoBDT | -9 |
semilepMVAreco_nuApprox_recoBDT_6jsplit | -9 |
semilepMVAreco_nuApprox_recoBDT_withH | -9 |
semilepMVAreco_nuApprox_recoBDT_withH_6jsplit | -9 |
semilepMVAreco_ttH_Ht_withH | -9.0 |
ttHF_mva_discriminant | -3.0 |
jet1_semilepMVAreco_recoBDT_cand | 0 |
jet1_semilepMVAreco_recoBDT_cand_6jsplit | 0 |
jet1_semilepMVAreco_recoBDT_withH_cand | 0 |
jet1_semilepMVAreco_recoBDT_withH_cand_6jsplit | 0 |
jet2_jvt | 0.0 |
jet2_semilepMVAreco_recoBDT_cand | 0 |
jet2_semilepMVAreco_recoBDT_cand_6jsplit | 0 |
jet2_semilepMVAreco_recoBDT_withH_cand | 0 |
jet2_semilepMVAreco_recoBDT_withH_cand_6jsplit | 0 |
jet3_semilepMVAreco_recoBDT_cand | 0 |
jet3_semilepMVAreco_recoBDT_cand_6jsplit | 0 |
jet3_semilepMVAreco_recoBDT_withH_cand | 0 |
jet3_semilepMVAreco_recoBDT_withH_cand_6jsplit | 0 |
jet4_jvt | 0.0 |
jet4_semilepMVAreco_recoBDT_cand | 0 |
jet4_semilepMVAreco_recoBDT_cand_6jsplit | 0 |
jet4_semilepMVAreco_recoBDT_withH_cand | 0 |
jet4_semilepMVAreco_recoBDT_withH_cand_6jsplit | 0 |
feature | minimum value in class 0 | minimum value in class 1 | maximum value in class 0 | maximum value in class 1 | mean value in class 0 | mean value in class 1 |
---|---|---|---|---|---|---|
Aplan_bjets | -7.49400541622e-16 | -9.99200722163e-16 | 0.306771606207 | 0.342340409756 | nan | nan |
Aplan_jets | -1.0 | -1.0 | 0.384027123451 | 0.396786928177 | -0.292178604066 | -0.105690957788 |
Centrality_all | 0.216128453612 | 0.239360421896 | 0.984351992607 | 0.980324208736 | 0.621264074856 | 0.638158164476 |
ClassifBDTOutput_6jsplit | -9.0 | -9.0 | 0.734750509262 | 0.877887845039 | -8.96257449902 | -8.01602255328 |
ClassifBDTOutput_basic | -9.0 | -9.0 | 0.758132338524 | 0.884025037289 | -8.64445531541 | -5.95642784706 |
ClassifBDTOutput_withReco_6jsplit | -9.0 | -9.0 | 0.502883553505 | 0.824222147465 | -8.96275092476 | -8.01666383235 |
ClassifBDTOutput_withReco_basic | -9.0 | -9.0 | 0.738292813301 | 0.901024758816 | -8.64556584485 | -5.96056388646 |
ClassifHPLUS_Semilep_HF_BDT200_Output | -9.0 | -9.0 | 0.677360117435 | 0.763524889946 | -3.11174767296 | -1.71056304817 |
dEtajj_MaxdEta | -1.0 | -1.0 | 4.9722328186 | 4.98975086212 | 1.29708928277 | 1.9236902294 |
dRbb_avg | 0.0 | 0.0 | 5.20613908768 | 4.99208021164 | 1.29767985497 | 1.58054554725 |
dRbb_MaxM | -1.0 | -1.0 | 5.21220350266 | 5.48122310638 | 1.02290731418 | 1.64366400836 |
dRbb_MaxPt | 0.0 | 0.0 | 5.20613908768 | 4.99208021164 | 1.24985918111 | 1.31763489994 |
dRbb_min | -9.0 | -9.0 | 5.18689107895 | 4.99208021164 | -3.38273138084 | -1.34053146174 |
dRbj_Wmass | 0.0 | 0.0 | 5.00446748734 | 4.13039302826 | 1.36783033094 | 1.36378178599 |
dRHl_MaxdR | -1.0 | -1.0 | 5.84383440018 | 8.75765514374 | 1.02870933225 | 1.60747307688 |
dRHl_MindR | -1.0 | -1.0 | 5.84383440018 | 8.75765514374 | 1.02870933225 | 1.60747307688 |
dRjj_min | 0.362186223269 | 0.37486192584 | 3.22884869576 | 2.72731518745 | 0.702402730876 | 0.627243045802 |
dRlepbb_MindR | 0.00490203499794 | 0.0211617406458 | 5.84383440018 | 8.75765514374 | 1.94797847806 | 2.02145818886 |
dRlj_MindR | -1.0 | -1.0 | 4.06677055359 | 3.64885306358 | 0.359403910135 | 0.64341203333 |
dRuu_MindR | 0.0 | 0.0 | 5.5062456131 | 5.48175954819 | 1.0313093197 | 0.896536698692 |
FirstLjetM | -0.011048543267 | -0.015625 | 503068.375 | 817635.5625 | 102360.424253 | 122507.555068 |
FirstLjetPt | 250003.3125 | 250009.53125 | 1848713.625 | 1630398.75 | 348174.887921 | 370642.333148 |
H1_all | 7.0661510108e-05 | 6.7265740654e-05 | 0.944513261318 | 0.945130288601 | 0.285435253726 | 0.268731691681 |
H4_all | -9.0 | -9.0 | 0.842754781246 | 0.828143060207 | -2.64479372308 | -1.19953542583 |
HhadT_nJets | 33974.75 | 34827.8984375 | 651883.0625 | 628216.9375 | 122960.739836 | 119759.104227 |
HhadT_nLjets | 135899.0 | 176410.65625 | 1867244.25 | 1795891.375 | 505162.491359 | 574636.273966 |
HiggsbbM | -9.0 | -9.0 | 1789595.875 | 1837845.625 | 116333.483631 | 123116.149257 |
HiggsjjM | 13038.6503906 | 11901.09375 | 1386965.625 | 705543.125 | 83175.0197188 | 77691.0902096 |
HT_all | -1.0 | -1.0 | 2998874.25 | 3813841.75 | 519635.867209 | 768793.366519 |
HT_jets | 135899.0 | 199256.5 | 3734488.5 | 3769301.75 | 646818.802921 | 797434.493496 |
HT_ljets | 250002.3125 | 250008.53125 | 3512577.0 | 3476071.5 | 468513.815387 | 540681.450511 |
Mbb_MaxM | 0.0 | 0.0 | 2051245.625 | 2839042.0 | 143919.056572 | 205512.599976 |
Mbb_MaxPt | 0.0 | 0.0 | 1789595.875 | 1837845.625 | 131623.76425 | 146598.099647 |
Mbb_MindR | 0.0 | 0.0 | 1789595.875 | 1837845.625 | 128040.159215 | 127675.380439 |
Mbj_MaxPt | 0.0 | 0.0 | 2419627.75 | 2772001.5 | 214920.927609 | 210647.631835 |
Mbj_MindR | 0.0 | 0.0 | 1098457.875 | 1901941.0 | 100846.759403 | 89691.6205125 |
Mbj_Wmass | 0.0 | 0.0 | 836553.3125 | 637244.6875 | 94151.4971774 | 87664.8211015 |
met_met | 348.632385254 | 843.942993164 | 1160893.125 | 1508619.125 | 106821.474231 | 109686.968154 |
met_phi | -3.1415617466 | -3.14124464989 | 3.14156293869 | 3.14151310921 | -0.0143933530364 | 4.91036199371e-05 |
MHiggs | -1.0 | -1.0 | 2051245.625 | 2839042.0 | 141702.094582 | 192775.171666 |
Mjj_HiggsMass | -1.0 | -1.0 | 438024.71875 | 338264.125 | 85094.3370441 | 105528.695968 |
Mjjj_MaxPt | 62055.4609375 | 63395.2148438 | 4116450.5 | 3500298.0 | 335086.618266 | 342816.680347 |
Mjj_MaxPt | 28786.671875 | 36976.21875 | 4084822.25 | 3298687.25 | 201428.101322 | 210318.903906 |
Mjj_MindR | 13038.6503906 | 11901.09375 | 723703.375 | 705543.125 | 82561.5963174 | 77360.232757 |
Mjj_MinM | -1.0 | -1.0 | 438024.71875 | 338264.125 | 42420.388276 | 44878.988362 |
mu | 0.490000009537 | 0.490000009537 | 39.5 | 39.5 | 21.0076103631 | 20.9236871621 |
Muu_MindR | 0.0 | 0.0 | 2369158.25 | 2304091.25 | 103974.314221 | 92473.307948 |
NBFricoNN_dil | -2.0 | -2.0 | 0.767973124981 | 0.860643327236 | -1.85900485978 | -1.17765695231 |
NBFricoNN_ljets | -2.0 | -2.0 | 0.652532041073 | 0.737013995647 | -1.92227280168 | -1.28610142853 |
nBjetOutsideLjet | 0 | 0 | 5 | 6 | 0.98264 | 1.44464 |
nBTags | 0 | 0 | 5 | 7 | 1.56036 | 2.21768 |
nBTags30 | 0 | 0 | 3 | 5 | 0.54768 | 0.77172 |
nBTags50 | 0 | 0 | 4 | 6 | 0.94012 | 1.33936 |
nBTags60 | 0 | 0 | 4 | 6 | 1.14916 | 1.64156 |
nBTags70 | 0 | 0 | 4 | 7 | 1.3756 | 1.95504 |
nBTags77 | 0 | 0 | 5 | 7 | 1.56036 | 2.21768 |
nBTags80 | 0 | 0 | 5 | 7 | 1.65784 | 2.35056 |
nBTags85 | 1 | 1 | 6 | 8 | 1.86796 | 2.62604 |
nBTags90 | 1 | 1 | 7 | 8 | 2.18832 | 3.04652 |
nBTagsFlatBEff_30 | 0 | 0 | 3 | 5 | 0.54472 | 0.77668 |
nBTagsFlatBEff_40 | 0 | 0 | 4 | 5 | 0.74572 | 1.06628 |
nBTagsFlatBEff_50 | 0 | 0 | 4 | 5 | 0.9298 | 1.33456 |
nBTagsFlatBEff_60 | 0 | 0 | 4 | 6 | 1.13152 | 1.6222 |
nBTagsFlatBEff_70 | 0 | 0 | 5 | 7 | 1.359 | 1.93748 |
nBTagsFlatBEff_77 | 0 | 0 | 5 | 8 | 1.54928 | 2.2038 |
nBTagsFlatBEff_85 | 0 | 0 | 6 | 8 | 1.87136 | 2.62892 |
nElectrons | 1 | 1 | 1 | 1 | 1.0 | 1.0 |
nHFJets | 0 | 0 | 7 | 10 | 2.30676 | 3.29404 |
NHiggs_30 | -1 | -1 | 7 | 9 | -0.16768 | 0.42108 |
nJetOutsideLjet | 0 | 0 | 11 | 13 | 3.39384 | 4.5126 |
Njet_pt40 | 1 | 1 | 10 | 14 | 4.22108 | 5.4158 |
Njet_pt40.1 | 1 | 1 | 10 | 14 | 4.22108 | 5.4158 |
nJets | 4 | 4 | 13 | 16 | 5.35484 | 6.7974 |
nLjet_m100 | 0 | 0 | 3 | 4 | 0.7122 | 0.94324 |
nLjet_m50 | 0 | 0 | 4 | 4 | 1.102 | 1.2938 |
nLjets | 1 | 1 | 4 | 5 | 1.3592 | 1.49524 |
nMuons | 0 | 0 | 0 | 0 | 0.0 | 0.0 |
nPrimaryVtx | 2 | 2 | 38 | 38 | 15.85572 | 15.81796 |
pT_jet3 | -1.0 | -1.0 | 553179.625 | 655778.875 | 67092.7895566 | 100925.368944 |
pT_jet5 | -1.0 | -1.0 | 237711.734375 | 288864.4375 | 31992.8273128 | 57656.268518 |
pTuu_MindR | 0.0 | 0.0 | 1785396.25 | 1287976.125 | 220496.411567 | 217252.005205 |
SecondLjetM | -1.0 | -1.0 | 662050.5 | 557824.5 | 36700.0963306 | 50411.2378435 |
SecondLjetPt | -1.0 | -1.0 | 1663864.5 | 1338708.75 | 114137.696609 | 150199.621612 |
semilepMVAreco_b1higgsbhadtop_dR | -9.0 | -9.0 | 5.08403301239 | 5.31647586823 | -8.56438656943 | -5.39344733685 |
semilepMVAreco_bbhiggs_dR | -9.0 | -9.0 | 5.21220350266 | 5.40304374695 | -8.56049635007 | -5.42599335716 |
semilepMVAreco_BDT_output | -9.0 | -9.0 | 0.644310355186 | 0.707822859287 | -8.63353378944 | -5.92251666529 |
semilepMVAreco_BDT_output_6jsplit | -9.0 | -9.0 | 0.582318305969 | 0.660739064217 | -8.96109367886 | -8.00203296173 |
semilepMVAreco_BDT_output_truthMatchPattern | -9 | -9 | 60 | 63 | -1.977 | 7.92496 |
semilepMVAreco_BDT_withH_output | -9.0 | -9.0 | 0.729659199715 | 0.780469894409 | -8.63642664285 | -5.92580282234 |
semilepMVAreco_BDT_withH_output_6jsplit | -9.0 | -9.0 | 0.566623270512 | 0.744392037392 | -8.96178210871 | -8.00267216272 |
semilepMVAreco_BDT_withH_output_truthMatchPattern | -9 | -9 | 60 | 63 | -2.1314 | 7.57316 |
semilepMVAreco_hadWb1Higgs_mass | -9.0 | -9.0 | 4144045.0 | 3138752.5 | 17252.1125775 | 129582.668275 |
semilepMVAreco_higgsbhadtop_withH_dR | -9.0 | -9.0 | 5.4440369606 | 7.25095748901 | -8.56025376543 | -5.35999591952 |
semilepMVAreco_higgsbleptop_mass | -9.0 | -9.0 | 2203183.0 | 3036408.5 | 16939.5290481 | 122993.176201 |
semilepMVAreco_higgsbleptop_withH_dR | -9.0 | -9.0 | 5.82105350494 | 6.33459186554 | -8.55935880522 | -5.35411778042 |
semilepMVAreco_higgslep_dR | -9.0 | -9.0 | 4.99350690842 | 5.977602005 | -8.55341973933 | -5.30970366736 |
semilepMVAreco_higgsleptop_dR | -9.0 | -9.0 | 6.16924333572 | 6.22700834274 | -8.55147746722 | -5.3079290331 |
semilepMVAreco_higgs_mass | -9.0 | -9.0 | 2051245.625 | 1930520.875 | 8641.03434141 | 59405.6110681 |
semilepMVAreco_higgsq1hadW_mass | -9.0 | -9.0 | 4116450.5 | 3428256.75 | 18882.1229678 | 137939.1892 |
semilepMVAreco_higgsttbar_withH_dR | -9.0 | -9.0 | 7.26006364822 | 7.93532514572 | -8.52146270812 | -5.08767969191 |
semilepMVAreco_leptophadtop_dR | -9.0 | -9.0 | 5.71730709076 | 6.80416870117 | -8.53053641217 | -5.08514481921 |
semilepMVAreco_leptophadtop_withH_dR | -9.0 | -9.0 | 6.46707963943 | 6.84458255768 | -8.52820537901 | -5.10033581936 |
semilepMVAreco_Ncombinations | -9 | -9 | 6048 | 20592 | 3.75084 | 138.16428 |
semilepMVAreco_nuApprox_recoBDT | -9 | -9 | 2 | 2 | -8.60832 | -5.72272 |
semilepMVAreco_nuApprox_recoBDT_6jsplit | -9 | -9 | 2 | 2 | -8.95816 | -7.93368 |
semilepMVAreco_nuApprox_recoBDT_withH | -9 | -9 | 2 | 2 | -8.60816 | -5.72496 |
semilepMVAreco_nuApprox_recoBDT_withH_6jsplit | -9 | -9 | 2 | 2 | -8.95808 | -7.93432 |
semilepMVAreco_ttH_Ht_withH | -9.0 | -9.0 | 2493129.75 | 3769301.5 | 26544.5393081 | 226087.482491 |
ttHF_mva_discriminant | -3.0 | -3.0 | 0.741369903088 | 0.701689481735 | -2.33535657537 | -2.59714973919 |
el_d0sig[0] | -4.98371934891 | -4.99793577194 | 4.99155759811 | 4.97297143936 | 0.0262800611998 | 0.0276848047255 |
el_delta_z0_sintheta[0] | -0.355400532484 | -0.377378970385 | 0.496148884296 | 0.382513254881 | 0.00091882641401 | 0.000558113879654 |
el_e[0] | 27104.828125 | 27041.4433594 | 1436408.75 | 1929214.875 | 168441.738758 | 159692.57822 |
el_eta[0] | -2.4695084095 | -2.47413420677 | 2.48645853996 | 2.47491693497 | 0.00107813608341 | -0.0116445933656 |
el_phi[0] | -3.14139413834 | -3.14100241661 | 3.14118862152 | 3.1412460804 | 0.0022245401696 | 0.00335566958487 |
el_pt[0] | 27009.0683594 | 27002.4707031 | 837754.375 | 757782.0 | 107405.500895 | 103710.400585 |
el_topoetcone20[0] | -3932.19042969 | -7888.66113281 | 20991.453125 | 24406.2089844 | 456.835925678 | 529.43416707 |
jet_e[0] | 46213.4414062 | 52848.6953125 | 4174259.5 | 3456189.25 | 464535.950489 | 449840.888808 |
jet_eta[0] | -2.49848604202 | -2.49869942665 | 2.49944186211 | 2.49330043793 | 0.00534315453471 | -0.00362142494498 |
jet_jvt[0] | 0.0 | -0.10000000149 | 1.0 | 1.0 | 0.989750534531 | 0.987864212861 |
jet_mv2c10[0] | -0.999308645725 | -0.995598912239 | 0.999999701977 | 0.999999761581 | -0.122742881073 | -0.0354206077227 |
jet_mv2c20[0] | -0.998623251915 | -0.994459807873 | 0.999999761581 | 0.999999821186 | -0.151402284149 | -0.0680963563942 |
jet_phi[0] | -3.14089894295 | -3.1412255764 | 3.14156699181 | 3.14155507088 | -0.008290770526 | 0.00334214615222 |
jet_pt[0] | 42059.4296875 | 45892.9414062 | 1739462.0 | 1495945.75 | 275369.037988 | 283374.713063 |
jet_semilepMVAreco_recoBDT_cand[0] | 0 | 0 | 7 | 7 | 0.11516 | 0.89936 |
jet_semilepMVAreco_recoBDT_cand_6jsplit[0] | 0 | 0 | 5 | 5 | 0.01244 | 0.2792 |
jet_semilepMVAreco_recoBDT_withH_cand[0] | 0 | 0 | 7 | 7 | 0.11888 | 0.8966 |
jet_semilepMVAreco_recoBDT_withH_cand_6jsplit[0] | 0 | 0 | 5 | 5 | 0.01332 | 0.27356 |
jet_e[1] | 30531.8222656 | 34797.3242188 | 2993766.5 | 2534631.0 | 269864.614551 | 288450.834968 |
jet_eta[1] | -2.49766683578 | -2.49709820747 | 2.4989387989 | 2.49962115288 | 0.0135295786519 | 1.0545521983e-05 |
jet_jvt[1] | 0.0 | -0.10000000149 | 1.0 | 1.0 | 0.985452911446 | 0.985331679575 |
jet_mv2c10[1] | -0.99775582552 | -0.997706770897 | 0.999999701977 | 0.999999701977 | 0.0574935307476 | 0.0576477699607 |
jet_mv2c20[1] | -0.998131096363 | -0.996162235737 | 0.999999940395 | 0.999999940395 | 0.0237957392307 | 0.023740222446 |
jet_phi[1] | -3.1409778595 | -3.14135956764 | 3.14142346382 | 3.14152979851 | -0.0189235605479 | -0.0108111497169 |
jet_pt[1] | 26202.2050781 | 30877.859375 | 1477095.0 | 1329214.125 | 157387.536059 | 176812.027551 |
jet_semilepMVAreco_recoBDT_cand[1] | 0 | 0 | 7 | 7 | 0.1154 | 0.92172 |
jet_semilepMVAreco_recoBDT_cand_6jsplit[1] | 0 | 0 | 6 | 6 | 0.01188 | 0.28308 |
jet_semilepMVAreco_recoBDT_withH_cand[1] | 0 | 0 | 7 | 7 | 0.10728 | 0.86844 |
jet_semilepMVAreco_recoBDT_withH_cand_6jsplit[1] | 0 | 0 | 6 | 6 | 0.01084 | 0.27648 |
jet_e[2] | 25806.6972656 | 27428.9472656 | 1507045.25 | 1818133.625 | 171901.415283 | 198140.886519 |
jet_eta[2] | -2.4985897541 | -2.49969387054 | 2.49999523163 | 2.49950170517 | 0.0049847052997 | 0.00271204581293 |
jet_jvt[2] | -0.10000000149 | -0.10000000149 | 1.0 | 1.0 | 0.978870672863 | 0.980619707801 |
jet_mv2c10[2] | -0.997559428215 | -0.998916089535 | 0.999999582767 | 0.999999642372 | -0.112490939107 | 0.00172402071433 |
jet_mv2c20[2] | -0.997589230537 | -0.998842120171 | 0.999999940395 | 0.999999940395 | -0.140844755664 | -0.0298748113483 |
jet_phi[2] | -3.1415116787 | -3.14148664474 | 3.14144229889 | 3.14155912399 | 0.00747743471755 | 0.0188983024984 |
jet_pt[2] | 25267.2773438 | 25528.3945312 | 553179.625 | 655778.875 | 95848.9559049 | 117844.390068 |
jet_semilepMVAreco_recoBDT_cand[2] | 0 | 0 | 7 | 7 | 0.1164 | 0.94568 |
jet_semilepMVAreco_recoBDT_cand_6jsplit[2] | 0 | 0 | 6 | 6 | 0.01224 | 0.30004 |
jet_semilepMVAreco_recoBDT_withH_cand[2] | 0 | 0 | 7 | 7 | 0.1122 | 0.91572 |
jet_semilepMVAreco_recoBDT_withH_cand_6jsplit[2] | 0 | 0 | 6 | 6 | 0.01152 | 0.28696 |
jet_e[3] | 25334.3671875 | 25666.6425781 | 1071788.875 | 1521262.0 | 115948.896984 | 145527.215009 |
jet_eta[3] | -2.4997625351 | -2.4999859333 | 2.49969911575 | 2.49848270416 | -0.00272975809201 | 0.00816208132214 |
jet_jvt[3] | -0.10000000149 | -0.10000000149 | 1.0 | 1.0 | 0.972673268423 | 0.977125209702 |
jet_mv2c10[3] | -0.994961559772 | -0.998191177845 | 0.999999344349 | 0.999999642372 | -0.240904994637 | -0.0753386941482 |
jet_mv2c20[3] | -0.99590241909 | -0.996704161167 | 0.999999880791 | 0.999999940395 | -0.264075163277 | -0.105536294683 |
jet_phi[3] | -3.14130163193 | -3.14151310921 | 3.14131808281 | 3.14156985283 | 0.0154864133365 | 0.00205520206863 |
jet_pt[3] | 25000.109375 | 25005.5957031 | 399645.375 | 495660.59375 | 61351.4743601 | 83082.4428178 |
jet_semilepMVAreco_recoBDT_cand[3] | 0 | 0 | 7 | 7 | 0.1204 | 0.94172 |
jet_semilepMVAreco_recoBDT_cand_6jsplit[3] | 0 | 0 | 6 | 6 | 0.01276 | 0.29644 |
jet_semilepMVAreco_recoBDT_withH_cand[3] | 0 | 0 | 7 | 7 | 0.1194 | 0.93416 |
jet_semilepMVAreco_recoBDT_withH_cand_6jsplit[3] | 0 | 0 | 6 | 6 | 0.01276 | 0.29316 |
ljet_C2[0] | -999.0 | -999.0 | 0.501163780689 | 0.53831577301 | -4.82591801987 | -2.95712873784 |
ljet_D2[0] | -999.0 | -999.0 | 163.785522461 | 246.203109741 | -3.4621610587 | -1.72295446199 |
ljet_e[0] | 251338.25 | 255339.578125 | 3401028.5 | 2810728.25 | 560908.394012 | 570946.115023 |
ljet_eta[0] | -1.99933993816 | -1.99875664711 | 1.99949324131 | 1.99960911274 | -0.0018661135992 | 0.000889097331575 |
ljet_m[0] | -0.011048543267 | -0.015625 | 503068.375 | 817635.5625 | 102360.424253 | 122507.555068 |
ljet_phi[0] | -3.14147734642 | -3.14110732079 | 3.1414732933 | 3.14124441147 | -0.00497340873295 | 0.00324811896069 |
ljet_pt[0] | 250003.3125 | 250009.53125 | 1848713.625 | 1630398.75 | 348174.887921 | 370642.333148 |
ljet_sd12[0] | 0.0 | 0.0 | 356866.40625 | 561714.625 | 55438.2231004 | 65852.9702488 |
ljet_sd23[0] | 0.0 | 0.0 | 159762.46875 | 170242.5 | 14938.9851776 | 20279.6674246 |
ljet_tau21[0] | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
ljet_tau21_wta[0] | -999.0 | -999.0 | 0.875818729401 | 0.884462535381 | -4.6324538598 | -2.77555435605 |
ljet_tau32[0] | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
ljet_tau32_wta[0] | -999.0 | -999.0 | 0.922931492329 | 0.914339840412 | -9.91527871751 | -7.28486958923 |
class | 0 | 1 | 0 | 1 | 0.0 | 1.0 |
filename_data_raw="data.csv"
filename_data_preprocessed="data_preprocessed.csv"
filename_ttH="ttH_group.phys-higgs.10205167._000002.out.root"
filename_ttbb="ttbb_group.phys-higgs.10205185._000001.out.root"
number_events="25000"
rm "${filename_data_raw}"
rm "${filename_data_preprocessed}"
#./ttHbb_examine_ROOT_file.py --fileroot="${filename_ttH}"
#./ttHbb_examine_ROOT_file.py --fileroot="${filename_ttbb}"
./ttHbb_ROOT_file_to_CSV_file.py \
--fileroot="${filename_ttH}" \
--classlabel=1 \
--filecsv="${filename_data_raw}" \
--maxevents="${number_events}" \
--headings=true
./ttHbb_ROOT_file_to_CSV_file.py \
--fileroot="${filename_ttbb}" \
--classlabel=0 \
--filecsv="${filename_data_raw}" \
--maxevents="${number_events}" \
--headings=true
./ttHbb_preprocess_CSV_file.py \
--infile="${filename_data_raw}" \
--outfile="${filename_data_preprocessed}"
./ttHbb_plots_of_CSV.py \
--infile="${filename_data_raw}" \
--histogramcomparisons=false \
--scattermatrix=false \
--eventimages=false \
--directoryplots=plots_raw
./ttHbb_plots_of_CSV.py \
--infile="${filename_data_preprocessed}" \
--histogramcomparisons=false \
--scattermatrix=false \
--eventimages=true \
--directoryplots=plots_preprocessed
In [2]:
import missingno
import pandas as pd
data = pd.read_csv(
"data.csv",
delimiter = ","
)
# first 100 rows of first 40 columns
missingno.matrix(data[data.columns[0:39]][0:100], color = (0, 0, 0))
# first 100 rows
missingno.matrix(data[0:100], color = (0, 0, 0))
# first 100 rows, sorted by completeness
data_sorted = missingno.nullity_sort(data, sort = "descending")
missingno.matrix(data_sorted[0:100], color = (0, 0, 0))
In [3]:
import missingno
import pandas as pd
data = pd.read_csv(
"data.csv",
delimiter = ","
)
# bar chart, first 78 columns
missingno.bar(data[data.columns[0:77]], color = (0, 0, 0))
# bar chart, next 78 columns
missingno.bar(data[data.columns[78:156]], color = (0, 0, 0))
The nullity correlation heatmap shows how strongly the presence of one variable positively or negatively affects the presence of another variable.
nullity correlation | description |
---|---|
-1 | If one variable appears, the other certainly does not appear. |
0 | The appearance of one variable appears to have no effect on the other variable appearing. |
1 | If one variable appears, the other certainly does appear. |
In [98]:
import missingno
import pandas as pd
import shijian
data = pd.read_csv(
"data.csv",
delimiter = ","
)
# remove variables with no missing data
data = data.loc[:, data.isnull().any(axis = 0)]
edge_pairs = shijian.ranges_edge_pairs(
extent = len(data.columns), # number of variables
range_length = 28 # maximum number of variables per plot
)
for edge_pair in edge_pairs:
missingno.heatmap(data[data.columns[edge_pair[0]:edge_pair[1]]])
The nullity dendrogram visualizes variable completeness, revealing trends deeper than the pairwise ones visible in the correlation heatmap. The dendrogram uses a hierarchical clustering algorithm to bin variables against one another by their nullity correlation, which is measured in terms of binary distance). At each bifurcation of the tree, the variables are split based on which combination minimizes the distance of the remaining clusters. The more monotone the set of variables, the closer their total distance is to zero, and the closer their average distance is to zero.
To interpret the dendrogram, read it hierarchically top-down. Cluster leaves which linked together at a distance of zero fully predict one another's presence -- one variable might always be empty when another is filled, or they might both be filled or both empty, and so on. Cluster leaves which split close to zero, but not at zero, predict one another very well, but imperfectly.
In [104]:
import missingno
import pandas as pd
import shijian
data = pd.read_csv(
"data.csv",
delimiter = ","
)
# remove variables with no missing data
data = data.loc[:, data.isnull().any(axis = 0)]
edge_pairs = shijian.ranges_edge_pairs(
extent = len(data.columns), # number of variables
range_length = 30 # maximum number of variables per plot
)
for edge_pair in edge_pairs:
missingno.dendrogram(data[data.columns[edge_pair[0]:edge_pair[1]]])
${t\bar{t}b\bar{b}}$ | ${t\bar{t}H}$ |
---|---|
In [6]:
import missingno
import pandas as pd
import shijian
data = pd.read_csv(
"data.csv",
delimiter = ","
)
# sort data by completeness
data = missingno.nullity_sort(data, sort = "descending")
# access first 500 rows
data = data[0:500]
edge_pairs = shijian.ranges_edge_pairs(
extent = len(data.columns), # number of variables
range_length = 40 # maximum number of variables per plot
)
for edge_pair in edge_pairs:
missingno.matrix(data[data.columns[edge_pair[0]:edge_pair[1]]], color = (0, 0, 0))
So, an obvious event selection is, as expected, one electron or one muon.
In [12]:
import missingno
import pandas as pd
import shijian
data = pd.read_csv(
"data.csv",
delimiter = ","
)
# sort data by completeness
data = missingno.nullity_sort(data, sort = "descending")
print("full data, sorted by completeness:")
missingno.matrix(data, color = (0, 0, 0))
In [13]:
edge_pairs = shijian.ranges_edge_pairs(
extent = len(data.columns), # number of variables
range_length = 40 # maximum number of variables per plot
)
for edge_pair in edge_pairs:
missingno.matrix(data[data.columns[edge_pair[0]:edge_pair[1]]], color = (0, 0, 0))
In [2]:
import missingno
import pandas as pd
import shijian
data = pd.read_csv(
"data.csv",
delimiter = ","
)
# sort data by completeness
data = missingno.nullity_sort(data, sort = "descending")
# access first 1000 rows
data = data[0:500]
#missingno.matrix(data, color = (0, 0, 0))
missingno.matrix(data[data.columns[100:150]], color = (0, 0, 0))
In [2]:
import missingno
import pandas as pd
import shijian
data = pd.read_csv(
"data.csv",
delimiter = ","
)
# sort data by completeness
data = missingno.nullity_sort(data, sort = "descending")
# access first 1000 rows
data = data[0:500]
#missingno.matrix(data, color = (0, 0, 0))
missingno.matrix(data[data.columns[100:150]], color = (0, 0, 0))
rm data.csv
rm data_preprocessed.csv
filename_ttH="ttH_group.phys-higgs.10205167._000002.out.root"
filename_ttbb="ttbb_group.phys-higgs.10205185._000001.out.root"
#number_events="25000"
#./ttHbb_examine_ROOT_file.py --fileroot="${filename_ttH}"
#./ttHbb_examine_ROOT_file.py --fileroot="${filename_ttbb}"
./ttHbb_ROOT_file_to_CSV_file.py \
--fileroot="${filename_ttH}" \
--selection="ejets_5JE4BI" \
--classlabel=1 \
--filecsv=data.csv \
--headings=true
./ttHbb_ROOT_file_to_CSV_file.py \
--fileroot="${filename_ttbb}" \
--selection="ejets_5JE4BI" \
--classlabel=0 \
--filecsv=data.csv \
--headings=true
./ttHbb_preprocess_CSV_file.py
--infile=data.csv \
--outfile=data_preprocessed.csv
./ttHbb_plots_of_CSV.py \
--infile=data.csv \
--histogramcomparisons=true \
--scattermatrix=false \
--directoryplots=plots_raw
./ttHbb_plots_of_CSV.py \
--infile=data_preprocessed.csv \
--histogramcomparisons=true \
--scattermatrix=false \
--directoryplots=plots_preprocessed
rm data.csv
rm data_preprocessed.csv
#filename_ttH="ttH_group.phys-higgs.10205167._000002.out.root" # January
#filename_ttbb="ttbb_group.phys-higgs.10205185._000001.out.root" # January
filename_ttH="ttH_group.phys-higgs.11468583._000005.out.root" # June
filename_ttbb="ttbb_group.phys-higgs.11468624._000005.out.root" # June
#number_events="25000"
./ttHbb_examine_ROOT_file.py --fileroot="${filename_ttH}"
./ttHbb_examine_ROOT_file.py --fileroot="${filename_ttbb}"
./ttHbb_ROOT_file_to_CSV_file.py \
--fileroot="${filename_ttH}" \
--selection="ejets_5JE4BI" \
--classlabel=1 \
--filecsv=data.csv \
--headings=true
./ttHbb_ROOT_file_to_CSV_file.py \
--fileroot="${filename_ttbb}" \
--selection="ejets_5JE4BI" \
--classlabel=0 \
--filecsv=data.csv \
--headings=true
./ttHbb_preprocess_CSV_file.py
--infile=data.csv \
--outfile=data_preprocessed.csv
./ttHbb_plots_of_CSV.py \
--infile=data.csv \
--histogramcomparisons=true \
--scattermatrix=false \
--directoryplots=plots_raw
./ttHbb_plots_of_CSV.py \
--infile=data_preprocessed.csv \
--histogramcomparisons=true \
--scattermatrix=false \
--directoryplots=plots_preprocessed
general kinematic variables:
variable | n-tuple name | description | >= 6j | 5j |
---|---|---|---|---|
${\Delta R^{\text{avg}}_{bb}}$ | dRbb_avg_Sort4 |
average ${\Delta R}$ for all ${b}$-tagged jet pairs | yes | yes |
${\Delta R^{\text{max} p_{T}}_{bb}}$ | dRbb_MaxPt_Sort4 |
${\Delta R}$ between the two ${b}$-tagged jets with the largest vector sum ${p_{T}}$ | yes | - |
${\Delta \eta^{\textrm{max}\Delta\eta}_{jj}}$ | dEtajj_MaxdEta |
maximum ${\Delta\eta}$ between any two jets | yes | yes |
${m^{\text{min} \Delta R}_{bb}}$ | Mbb_MindR_Sort4 |
mass of the combination of the two ${b}$-tagged jets with the smallest ${\Delta R}$ | yes | - |
${m^{\text{min} \Delta R}_{jj}}$ | Mjj_MindR |
mass of the combination of any two jets with the smallest ${\Delta R}$ | - | yes |
${p^{\text{jet} 5}_{T}}$ | pT_jet5 |
${p_{T}}$ of the fifth leading jet | yes | yes |
${N^{\text{Higgs}}_{30}}$ | nHiggsbb30_Sort4 |
number of ${b}$-jet pairs with invariant mass within 30 GeV of the Higgs boson mass | yes | yes |
${H^{\text{had}}_{T}}$ | HT_jets ? |
scalar sum of jet ${p_{T}}$ | - | yes |
${\Delta R^{\text{min}\Delta R}_{\text{lep}-bb}}$ | dRlepbb_MindR_Sort4 |
${\Delta R}$ between the lepton and the combination of the two ${b}$-tagged jets with the smallest ${\Delta R}$ | - | yes |
aplanarity | Aplanarity_jets |
${1.5\lambda_{2}}$, where ${\lambda_{2}}$ is the second eigenvalue of the momentum tensor built with all jets | yes | yes |
centrality | Centrality_all |
the scalar sum of the ${p_{T}}$ divided by the sum of the ${E}$ for all jets and the lepton | yes | yes |
${H1}$ | H1_all |
second Fox-Wolfram moment computed using all jets and the lepton | yes | yes |
variables from reconstruction BDT output:
variable | note name | n-tuple name | description | >=6j | 5j |
---|---|---|---|---|---|
BDT | TTHReco_best_TTHReco |
BDT output | yes* | yes* | |
${m_{H}}$ | TTHReco_T85_best_Higgs_mass |
TTHReco_best_Higgs_mass |
Higgs boson mass | yes | yes |
${m_{H,b_{\text{lep top}}}}$ | TTHReco_T85_best_Higgsleptop_mass |
TTHReco_best_Higgsbleptop_mass |
mass of Higgs boson and ${b}$-jet from leptonic top | yes | - |
${\Delta R_{\text{Higgs }bb}}$ | TTHReco_T85_best_bbHiggs_dR |
TTHReco_best_bbHiggs_dR |
${\Delta R}$ between ${b}$-jets from the Higgs boson | yes | yes |
${\Delta R_{H,t\bar{t}}}$ | TTHReco_T85_withH_best_Higgsttbar_dR |
TTHReco_withH_best_Higgsttbar_dR |
${\Delta R}$ between Higgs boson and ${t\bar{t}}$ system | yes* | yes* |
${\Delta R_{H,\text{lep top}}}$ | TTHReco_T85_best_Higgsleptop_dR |
TTHReco_best_Higgsleptop_dR |
${\Delta R}$ between Higgs boson and leptonic ${t}$ | yes | - |
${\Delta R_{H,b_{\text{had top}}}}$ | TTHReco_T85_withH_best_TTHReco_T85_withH ? |
TTHReco_best_b1Higgsbhadtop_dR |
${\Delta R}$ between Higgs boson and ${b}$-jet from hadronic ${t}$ | - | yes* |
For variables from the reconstruction BDT, those with an asterisk are from the BDT using Higgs boson information while those with no asterisk are from the BDT without Higgs boson information.
variables from likelihood calculation:
variable | n-tuple name | description | >=6j | 5j |
---|---|---|---|---|
D | LHD_Discriminant |
likelihood discriminant | yes | yes |
variables from ${b}$-tagging:
variable | note name | n-tuple name | description | >=6j | 5j |
---|---|---|---|---|---|
${w^{H}_{b}}$ | TTHReco_T85_best_bbHiggs_tagWeightBin_sum |
? | sum of binned ${b}$-tagging weights of jets | yes | yes |
${B_{j^{3}}}$ | jet_mv2_order_3_tagWeightBin |
? | third jet binned ${b}$-tagging weight (sorted by weight) | yes | yes |
${B_{j^{4}}}$ | jet_mv2_order_4_tagWeightBin |
? | fourth jet binned ${b}$-tagging weight (sorted by weight) | yes | yes |
${B_{j^{5}}}$ | jet_mv2_order_5_tagWeightBin |
? | fifth jet binned ${b}$-tagging weight (sorted by weight) | yes | yes |
variable | type | note name | n-tuple name | description | >= 6j | 5j |
---|---|---|---|---|---|---|
${\Delta R^{\text{avg}}_{bb}}$ | general kinematic | dRbb_avg_Sort4 |
average ${\Delta R}$ for all ${b}$-tagged jet pairs | yes | yes | |
${\Delta R^{\text{max} p_{T}}_{bb}}$ | general kinematic | dRbb_MaxPt_Sort4 |
${\Delta R}$ between the two ${b}$-tagged jets with the largest vector sum ${p_{T}}$ | yes | - | |
${\Delta \eta^{\textrm{max}\Delta\eta}_{jj}}$ | general kinematic | dEtajj_MaxdEta |
maximum ${\Delta\eta}$ between any two jets | yes | yes | |
${m^{\text{min} \Delta R}_{bb}}$ | general kinematic | Mbb_MindR_Sort4 |
mass of the combination of the two ${b}$-tagged jets with the smallest ${\Delta R}$ | yes | - | |
${m^{\text{min} \Delta R}_{jj}}$ | general kinematic | Mjj_MindR |
mass of the combination of any two jets with the smallest ${\Delta R}$ | - | yes | |
${N^{\text{Higgs}}_{30}}$ | general kinematic | nHiggsbb30_Sort4 |
number of ${b}$-jet pairs with invariant mass within 30 GeV of the Higgs boson mass | yes | yes | |
${H^{\text{had}}_{T}}$ | general kinematic | HT_jets ? |
scalar sum of jet ${p_{T}}$ | - | yes | |
${\Delta R^{\text{min}\Delta R}_{\text{lep}-bb}}$ | general kinematic | dRlepbb_MindR_Sort4 |
${\Delta R}$ between the lepton and the combination of the two ${b}$-tagged jets with the smallest ${\Delta R}$ | - | yes | |
aplanarity | general kinematic | Aplanarity_jets |
${1.5\lambda_{2}}$, where ${\lambda_{2}}$ is the second eigenvalue of the momentum tensor built with all jets | yes | yes | |
${H1}$ | general kinematic | H1_all |
second Fox-Wolfram moment computed using all jets and the lepton | yes | yes | |
BDT | reconstruction BDT output | TTHReco_best_TTHReco |
BDT output | yes* | yes* | |
${m_{H}}$ | reconstruction BDT output | TTHReco_T85_best_Higgs_mass |
TTHReco_best_Higgs_mass |
Higgs boson mass | yes | yes |
${m_{H,b_{\text{lep top}}}}$ | reconstruction BDT output | TTHReco_T85_best_Higgsleptop_mass |
TTHReco_best_Higgsbleptop_mass |
mass of Higgs boson and ${b}$-jet from leptonic top | yes | - |
${\Delta R_{\text{Higgs }bb}}$ | reconstruction BDT output | TTHReco_T85_best_bbHiggs_dR |
TTHReco_best_bbHiggs_dR |
${\Delta R}$ between ${b}$-jets from the Higgs boson | yes | yes |
${\Delta R_{H,t\bar{t}}}$ | reconstruction BDT output | TTHReco_T85_withH_best_Higgsttbar_dR |
TTHReco_withH_best_Higgsttbar_dR |
${\Delta R}$ between Higgs boson and ${t\bar{t}}$ system | yes* | yes* |
${\Delta R_{H,\text{lep top}}}$ | reconstruction BDT output | TTHReco_T85_best_Higgsleptop_dR |
TTHReco_best_Higgsleptop_dR |
${\Delta R}$ between Higgs boson and leptonic ${t}$ | yes | - |
${\Delta R_{H,b_{\text{had top}}}}$ | reconstruction BDT output | TTHReco_T85_withH_best_TTHReco_T85_withH ? |
TTHReco_best_b1Higgsbhadtop_dR |
${\Delta R}$ between Higgs boson and ${b}$-jet from hadronic ${t}$ | - | yes* |
D | likelihood calculation | LHD_Discriminant |
likelihood discriminant | yes | yes | |
${\text{MEM}_{D1}}$ | matrix method | matrix method | yes | no | ||
${w^{H}_{b}}$ | ${b}$-tagging | TTHReco_T85_best_bbHiggs_tagWeightBin_sum |
? | sum of binned ${b}$-tagging weights of jets | yes | yes |
${B_{j^{3}}}$ | ${b}$-tagging | jet_mv2_order_3_tagWeightBin |
? | third jet binned ${b}$-tagging weight (sorted by weight) | yes | yes |
${B_{j^{4}}}$ | ${b}$-tagging | jet_mv2_order_4_tagWeightBin |
? | fourth jet binned ${b}$-tagging weight (sorted by weight) | yes | yes |
${B_{j^{5}}}$ | ${b}$-tagging | jet_mv2_order_5_tagWeightBin |
? | fifth jet binned ${b}$-tagging weight (sorted by weight) | yes | yes |
In [ ]: