In [1]:
import tensorflow as tf
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import json
import h5py
In [2]:
import sys
sys.path.insert(0, "../python")
import InputsProducer
import ParametrizedModel as pm
In [3]:
from InputsProducer import SampleType as st
from matplotlib.backends.backend_pdf import PdfPages
In [4]:
import statsmodels.stats.proportion
In [5]:
# file = '/data/dido/new_samples/NN_samples/GluGluToRadionToHHTo2B2Tau_M-*_*Tau_2016_*.root'
# file = '/data/dido/samples_23_10/GluGluToHHTo2B2Tau_node_SM_tauTau_2018_ggHH_NonRes.root'
# file = '/data/dido/samples_23_10/GluGluToHHTo2B2Tau_node_*2016*.root'
file = '/data/dido/samples_23_10/*.root'
In [7]:
%%time
data = InputsProducer.CreateRootDF(file, 0, True, True)
X, Y, Z, var_pos, var_pos_z, var_name = InputsProducer.CreateXY(data, '../config/training_variables.json' )
In [9]:
# np.save('X_par0_v2', X)
# np.count_nonzero(X - X_copy)
In [8]:
params = {"activation_dense_post": "sigmoid", "activation_dense_pre": "sigmoid", "dropout_rate_den_layers_post": 0,
"dropout_rate_den_layers_pre": 0, "dropout_rate_rnn": 0.0, "learning_rate_exp": -3.0,
"num_den_layers_post": 13, "num_den_layers_pre": 0, "num_rnn_layers": 5, "num_units_den_layers_post": 15,
"num_units_den_layers_pre": 1, "num_units_rnn_layer": 74, "optimizers": "Adam", "rnn_type": "LSTM",
"batch_size": 100}
In [ ]:
model = pm.HHModel(var_pos, '../config/mean_std_red.json', '../config/min_max_red.json', params)
model.call(X[0:1,:,:])
In [ ]:
model.compile(loss='binary_crossentropy',
optimizer='adam',
weighted_metrics=[pm.sel_acc_2])
In [ ]:
model.build(X.shape)
#model.summary()
In [ ]:
model.load_weights('../python/training_17_04_2020_par1_best_weights.h5', by_name=True)
In [ ]:
pred = model.predict(X, batch_size=100)
In [ ]:
np.save('pred_par0_v2', pred)
In [ ]:
pm.sel_acc(Y, pred, 2, 2,True, True)#X[:,:,6])
In [ ]:
predictions = pred
In [ ]:
# 2016
# VBF non res
# VBFHHTo2B2Tau_CV_0_5_C2V_1_C3_1
# VBFHHTo2B2Tau_CV_1_5_C2V_1_C3_1
# VBFHHTo2B2Tau_CV_1_C2V_1_C3_0
# VBFHHTo2B2Tau_CV_1_C2V_1_C3_1
# VBFHHTo2B2Tau_CV_1_C2V_1_C3_2
# VBFHHTo2B2Tau_CV_1_C2V_2_C3_1
nodes_values_2016_vbf = [1,2,3,4,5,6]
# ggF non res
nodes_label_2016 = ['SM', 'box', '2','3','4','7','9','12']
nodes_values_2016 = [0,1,2,9,10,11,12,13]
# ggF res Graviton
mass_points_graviton_2016 = [250, 260, 270,280,300,320,340,350,400,450,500,550,600,650,750,800]
# ggF res Graviton
mass_points_radion_2016 = [250, 260, 270,280,300,320,340,350,400,450,500,550,600,650,800, 900]
In [ ]:
# 2017
# VBF non res
# VBFHHTo2B2Tau_CV_1_5_C2V_1_C3_1
# VBFHHTo2B2Tau_CV_1_C2V_1_C3_0
# VBFHHTo2B2Tau_CV_1_C2V_1_C3_1
# VBFHHTo2B2Tau_CV_1_C2V_1_C3_2
# VBFHHTo2B2Tau_CV_1_C2V_2_C3_1
nodes_values_2017_vbf = [2,3,4,5,6]
# ggF non res
nodes_label_2017 = ['SM', 2,3,4,7,9,12]
nodes_values_2017 = [0,2,3,4,7,9,12]
# ggF res Graviton
mass_points_graviton_2017 = [250,260,270,280,350,400,450,550,600,650,750,800]
# ggF res Graviton
mass_points_radion_2017 = [250,260,270,280,300,320,350,400,450,500,550,600,650,700,750,800,850,900,1000,1250,1500,1750,2000,2500,3000]
# VBF res Graviton
mass_points_graviton_vbf_2017 = [250,260,270,280,300,320,350,400,450,500,600,650,700,750,850,900,1000,1750,2000]
# VBF res Graviton
mass_points_radion_vbf_2017 = [250,270,280,300,350,400,450,500,550,600,650,700,750,800,850,900,1000,1250,1500,1750,2000,3000]
In [ ]:
# 2018
# VBF non res
# VBFHHTo2B2Tau_CV_0_5_C2V_1_C3_1
# VBFHHTo2B2Tau_CV_1_5_C2V_1_C3_1
# VBFHHTo2B2Tau_CV_1_C2V_1_C3_0
# VBFHHTo2B2Tau_CV_1_C2V_1_C3_1
# VBFHHTo2B2Tau_CV_1_C2V_1_C3_2
# VBFHHTo2B2Tau_CV_1_C2V_2_C3_1
nodes_values_2018_vbf = [1,2,3,4,5,6]
# ggF non res
nodes_label_2018 = ['SM',2,3,4,5,6,7,8,9,10,11,12]
nodes_values_2018 = [0,2,3,4,5,6,7,8,9,10,11,12]
# ggF res Graviton
mass_points_graviton_2018 = [250,260,270,280,300,320,350,400,450,500,550,600,650,700,750,800,850,900,1000,1250,1500,1750,2000,2500,3000]
# ggF res Radion
mass_points_radion_2018 = [250,260,270,280,300,320,350,400,450,500,550,600,650,700,750,800,850,900,1000,1250,1500,1750,2000,2500,3000]
# VBF res Graviton
mass_points_graviton_vbf_2018 = [250,260,270,280,300,320,350,400,450,500,600,650,700,750,850,900,1000,1200,1750,2000]
# VBF res Graviton
mass_points_radion_vbf_2018 = [250,260,270,280,300,320,350,400,450,500,550,600,650,700,750,800,900,1000,1250,1500,1750,2000,3000]
In [ ]:
def sel_acc(y_true, y_pred, n_positions, n_exp):
pred_sorted = tf.argsort(y_pred, axis=1, direction='DESCENDING')
n_evt = tf.shape(y_true)[0]
evt_id = tf.range(n_evt)
matches_vec = []
for n in range(n_positions):
index = tf.transpose(tf.stack([evt_id, tf.reshape(pred_sorted[:, n], shape=(n_evt,))]))
matches_vec.append(tf.gather_nd(y_true, index))
matches_sum = tf.add_n(matches_vec)
valid = tf.cast(tf.equal(matches_sum, n_exp), tf.float32)
n_valid = tf.reduce_sum(valid)
return n_valid / tf.cast(n_evt, tf.float32), n_valid, tf.cast(n_evt, tf.float32)
def sel_acc_2(y_true, y_pred):
return sel_acc(y_true, y_pred, 2, 2)
In [ ]:
score_info = {}
In [ ]:
def acc_prod(point, sample_type, spin, year, discr, channel, new_method, deep_csv, res):
index = []
for node in range(0, len(point)) :
if res == True :
index.append((Z[:, 0, 0] == sample_type) & (Z[:, 0, 1] == spin) & (Z[:, 0, 2] == point[node]) & (Z[:, 0, 4] == year) & (Z[:, 0, 5] == channel))
if res == False :
index.append((Z[:, 0, 0] == sample_type) & (Z[:, 0, 3] == point[node]) & (Z[:, 0, 4] == year) & (Z[:, 0, 5] == channel))
acc = []
valid = []
n_evt_sample = []
for idx in range(0, len(index)):
# print(Y[index[idx], : , 0])
# pred = Y[index[idx], : , 0]
pred = Y[index[idx], : , 0]
if new_method == True :
y_pred = predictions[index[idx]]
elif new_method == False and deep_csv == False:
y_pred = X[index[idx], : , discr]
elif new_method == False and deep_csv == True:
y_pred = data[index[idx], : , -1]
ratio, n_valid, n_evt = sel_acc_2(pred, y_pred)
x = ratio.numpy()
acc.append(float(x))
valid.append(float(n_valid))
n_evt_sample.append(float(n_evt))
return acc, valid, n_evt_sample
In [ ]:
def InitializeDict(x, x_labels, sample_type, spin, year, discr , channel, res, fig_name, x_axis_label,sample_title, log_xscale, name_tag):
acc, valid, n_valid_sample = acc_prod(x, sample_type, spin, year, discr, channel, True, False, res)
acc_df, valid_df, n_valid_sample_df = acc_prod(x, sample_type, spin, year, discr, channel, False, False, res)
acc_deepcsv, valid_deepcvs, n_valid_sample_deepcsv = acc_prod(x, sample_type, spin, year, discr, channel, False, True, res)
score_info[name_tag] = {"HH-btag": {"acc": acc , "valid": valid, "n_evt_sample": n_valid_sample},
"DF": {"acc":acc_df , "valid": valid_df, "n_evt_sample": n_valid_sample_df},
"DeepCSV": {"acc":acc_deepcsv , "valid": valid_deepcvs, "n_evt_sample": n_valid_sample_deepcsv}
}
return score_info
In [ ]:
# args = x, x_labels, sample_type, spin, year, discr , channel, res, fig_name, x_axis_label,sample_title,pdf, log_xscale, name_tag):
channel_name = ['ETau', 'MuTau', 'TauTau']
for channel in range(len(channel_name)):
#2016
InitializeDict(nodes_values_2016, nodes_values_2016, st.ggHH_NonRes, 0, 2016, 6, channel, False, 'plots/2016/non_res{}.pdf'.format(channel_name[channel]), 'node', 'Non Res ggF {}'.format(channel_name[channel]), False, 'ggHH_NonRes_{}_2016'.format(channel))
InitializeDict(nodes_values_2016_vbf, nodes_values_2016_vbf, st.VBFHH_NonRes, 0, 2016, 6, channel, False, 'plots/2016/non_res_vbf_{}.pdf'.format(channel_name[channel]), 'node', 'Non Res VBF {}'.format(channel_name[channel]), False, 'VBFHH_NonRes{}_2016'.format(channel))
InitializeDict(mass_points_graviton_2016, mass_points_graviton_2016, st.ggHH_Res, 2, 2016, 6, channel, True, 'plots/2016/graviton_{}.pdf'.format(channel_name[channel]), 'mass [GeV]', 'Graviton {}'.format(channel_name[channel]), True, 'ggHH_Res_R_{}_2016'.format(channel))
InitializeDict(mass_points_radion_2016, mass_points_radion_2016, st.ggHH_Res, 0, 2016, 6, channel, True, 'plots/2016/radion_{}.pdf'.format(channel_name[channel]), 'mass [GeV]', 'Radion {}'.format(channel_name[channel]), True, 'ggHH_Res_G_{}_2016'.format(channel))
#2017
InitializeDict(nodes_values_2017, nodes_values_2017, st.ggHH_NonRes, 0, 2017, 6, channel, False, 'plots/2017/non_res{}.pdf'.format(channel_name[channel]), 'node', 'Non Res ggF {}'.format(channel_name[channel]), False, 'ggHH_NonRes_{}_2017'.format(channel))
InitializeDict(nodes_values_2017_vbf, nodes_values_2017_vbf, st.VBFHH_NonRes, 0, 2017, 6, channel, False, 'plots/2017/non_res_vbf_{}.pdf'.format(channel_name[channel]), 'node', 'Non Res VBF {}'.format(channel_name[channel]), False, 'VBFHH_NonRes_{}_2017'.format(channel))
InitializeDict(mass_points_graviton_2017, mass_points_graviton_2017, st.ggHH_Res, 2, 2017, 6, channel, True, 'plots/2017/graviton_{}.pdf'.format(channel_name[channel]), 'mass [GeV]', 'Graviton {}'.format(channel_name[channel]), True, 'ggHH_Res_R_{}_2017'.format(channel))
InitializeDict(mass_points_radion_2017, mass_points_radion_2017, st.ggHH_Res, 0, 2017, 6, channel, True, 'plots/2017/radion_{}.pdf'.format(channel_name[channel]), 'mass [GeV]', 'Radion {}'.format(channel_name[channel]), True, 'ggHH_Res_G_{}_2017'.format(channel))
InitializeDict(mass_points_graviton_vbf_2017, mass_points_graviton_vbf_2017, st.VBFHH_Res, 2, 2017, 6, channel, True, 'plots/2017/VBF_graviton_{}.pdf'.format(channel_name[channel]), 'mass [GeV]', 'VBF Graviton {}'.format(channel_name[channel]), False, 'VBFHH_Res_R_{}_2017'.format(channel))
InitializeDict(mass_points_radion_vbf_2017, mass_points_radion_vbf_2017, st.VBFHH_Res, 0, 2017, 6, channel, True, 'plots/2017/VBF_radion_{}.pdf'.format(channel_name[channel]), 'mass [GeV]', 'VBF Radion {}'.format(channel_name[channel]), False, 'VBFHH_Res_G_{}_2017'.format(channel))
#2018
InitializeDict(nodes_values_2018, nodes_values_2018, st.ggHH_NonRes, 0, 2018, 6, channel, False, 'plots/2018/non_res{}.pdf'.format(channel_name[channel]), 'node', 'Non Res ggF {}'.format(channel_name[channel]), False, 'ggHH_NonRes_{}_2018'.format(channel))
InitializeDict(nodes_values_2018_vbf, nodes_values_2018_vbf, st.VBFHH_NonRes, 0, 2018, 6, channel, False, 'plots/2018/non_res_vbf_{}.pdf'.format(channel_name[channel]), 'node', 'Non Res VBF {}'.format(channel_name[channel]), False, 'VBFHH_NonRes_{}_2018'.format(channel))
InitializeDict(mass_points_graviton_2018, mass_points_graviton_2018, st.ggHH_Res, 2, 2018, 6, channel, True, 'plots/2018/graviton_{}.pdf'.format(channel_name[channel]), 'mass [GeV]', 'Graviton {}'.format(channel_name[channel]), True, 'ggHH_Res_R_{}_2018'.format(channel))
InitializeDict(mass_points_radion_2018, mass_points_radion_2018, st.ggHH_Res, 0, 2018, 6, channel, True, 'plots/2018/radion_{}.pdf'.format(channel_name[channel]), 'mass [GeV]', 'Radion {}'.format(channel_name[channel]), True, 'ggHH_Res_G_{}_2018'.format(channel))
InitializeDict(mass_points_graviton_vbf_2018, mass_points_graviton_vbf_2018, st.VBFHH_Res, 2, 2018, 6, channel, True, 'plots/2018/VBF_graviton_{}.pdf'.format(channel_name[channel]), 'mass [GeV]', 'VBF Graviton {}'.format(channel_name[channel]), False, 'VBFHH_Res_R_{}_2018'.format(channel))
InitializeDict(mass_points_radion_vbf_2018, mass_points_radion_vbf_2018, st.VBFHH_Res, 0, 2018, 6, channel, True, 'plots/2018/VBF_radion_{}.pdf'.format(channel_name[channel]), 'mass [GeV]', 'VBF Radion {}'.format(channel_name[channel]), False, 'VBFHH_Res_G_{}_2018'.format(channel))