In [1]:
import tensorflow as tf
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import json
import h5py
from matplotlib.backends.backend_pdf import PdfPages
import statsmodels.stats.proportion

In [2]:
import sys
sys.path.insert(0, "../python")
import InputsProducer
import ParametrizedModel as pm
from InputsProducer import SampleType as st


Welcome to JupyROOT 6.18/02

In [3]:
file = '/data/dido/new_samples/NN_samples/GluGluToRadionToHHTo2B2Tau_M-*_*Tau_2016_*.root'
# file = '/data/dido/samples_23_10/*.root'

In [4]:
%%time
data = InputsProducer.CreateRootDF(file, 0, True, True)
X, Y, Z, var_pos, var_pos_z, var_name = InputsProducer.CreateXY(data, '../config/training_variables.json' )
# np.save('X_par0', X)
# np.save('Y_par0', Y)


CPU times: user 21.3 s, sys: 470 ms, total: 21.7 s
Wall time: 25.9 s

In [5]:
params =  {"activation_dense_post": "sigmoid", "activation_dense_pre": "sigmoid", "dropout_rate_den_layers_post": 0, 
           "dropout_rate_den_layers_pre": 0, "dropout_rate_rnn": 0.0, "learning_rate_exp": -3.0,
           "num_den_layers_post": 13, "num_den_layers_pre": 0, "num_rnn_layers": 5, "num_units_den_layers_post": 15, 
           "num_units_den_layers_pre": 1, "num_units_rnn_layer": 74, "optimizers": "Adam", "rnn_type": "LSTM", 
           "batch_size": 100}

In [6]:
model = pm.HHModel(var_pos, '../config/mean_std_red.json', '../config/min_max_red.json', params)
model.call(X[0:1,:,:])


Out[6]:
<tf.Tensor: shape=(1, 10), dtype=float32, numpy=
array([[0.8321006, 0.8321006, 0.8321006, 0.8321006, 0.       , 0.       ,
        0.       , 0.       , 0.       , 0.       ]], dtype=float32)>
2020-04-21 11:34:56.418383: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcuda.so.1
2020-04-21 11:34:56.443769: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-04-21 11:34:56.444323: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1555] Found device 0 with properties: 
pciBusID: 0000:01:00.0 name: GeForce GTX 1060 3GB computeCapability: 6.1
coreClock: 1.7845GHz coreCount: 9 deviceMemorySize: 2.95GiB deviceMemoryBandwidth: 178.99GiB/s
2020-04-21 11:34:56.444612: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.1
2020-04-21 11:34:56.489685: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10
2020-04-21 11:34:56.515226: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcufft.so.10
2020-04-21 11:34:56.532167: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcurand.so.10
2020-04-21 11:34:56.557049: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcusolver.so.10
2020-04-21 11:34:56.560186: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcusparse.so.10
2020-04-21 11:34:56.560565: W tensorflow/stream_executor/platform/default/dso_loader.cc:55] Could not load dynamic library 'libcudnn.so.7'; dlerror: libcudnn.so.7: cannot open shared object file: No such file or directory
2020-04-21 11:34:56.560609: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1592] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...
2020-04-21 11:34:56.562381: I tensorflow/core/platform/cpu_feature_guard.cc:142] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
2020-04-21 11:34:56.583453: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 3000000000 Hz
2020-04-21 11:34:56.583999: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x19c59e10 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
2020-04-21 11:34:56.584047: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): Host, Default Version
2020-04-21 11:34:56.863556: W tensorflow/compiler/xla/service/platform_util.cc:276] unable to create StreamExecutor for CUDA:0: failed initializing StreamExecutor for CUDA device ordinal 0: Internal: failed call to cuDevicePrimaryCtxRetain: CUDA_ERROR_OUT_OF_MEMORY: out of memory; total memory reported: 3164012544
2020-04-21 11:34:56.879106: I tensorflow/compiler/jit/xla_gpu_device.cc:136] Ignoring visible XLA_GPU_JIT device. Device number is 0, reason: Internal: no supported devices found for platform CUDA
2020-04-21 11:34:56.879504: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1096] Device interconnect StreamExecutor with strength 1 edge matrix:
2020-04-21 11:34:56.879561: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1102]      

In [7]:
model.compile(loss='binary_crossentropy',
              optimizer='adam',
              weighted_metrics=[pm.sel_acc_2])

In [8]:
model.build(X.shape)

In [9]:
model.load_weights('../python/training_17_04_2020_par1_best_weights.h5', by_name=True)

In [10]:
predictions = model.predict(X, batch_size=100) 
# np.save('pred_par1', predictions)


2020-04-21 11:35:05.223314: W tensorflow/core/framework/cpu_allocator_impl.cc:81] Allocation of 39202800 exceeds 10% of system memory.

In [11]:
pm.sel_acc(Y, predictions, 2, 2,True, True)


Out[11]:
0.9341578

In [ ]:
# 2016
# VBF non res
# VBFHHTo2B2Tau_CV_0_5_C2V_1_C3_1
# VBFHHTo2B2Tau_CV_1_5_C2V_1_C3_1
# VBFHHTo2B2Tau_CV_1_C2V_1_C3_0
# VBFHHTo2B2Tau_CV_1_C2V_1_C3_1
# VBFHHTo2B2Tau_CV_1_C2V_1_C3_2
# VBFHHTo2B2Tau_CV_1_C2V_2_C3_1
nodes_values_2016_vbf = [1,2,3,4,5,6]

# ggF non res
nodes_label_2016 = ['SM', 'box', '2','3','4','7','9','12']
nodes_values_2016 = [0,1,2,9,10,11,12,13]

# ggF res Graviton
mass_points_graviton_2016 = [250, 260, 270,280,300,320,340,350,400,450,500,550,600,650,750,800]
# ggF res Graviton
mass_points_radion_2016 = [250, 260, 270,280,300,320,340,350,400,450,500,550,600,650,800, 900]

In [ ]:
# 2017
# VBF non res
# VBFHHTo2B2Tau_CV_1_5_C2V_1_C3_1
# VBFHHTo2B2Tau_CV_1_C2V_1_C3_0
# VBFHHTo2B2Tau_CV_1_C2V_1_C3_1
# VBFHHTo2B2Tau_CV_1_C2V_1_C3_2
# VBFHHTo2B2Tau_CV_1_C2V_2_C3_1
nodes_values_2017_vbf = [2,3,4,5,6]

# ggF non res
nodes_label_2017 = ['SM', 2,3,4,7,9,12]
nodes_values_2017 = [0,2,3,4,7,9,12]

# ggF res Graviton
mass_points_graviton_2017 = [250,260,270,280,350,400,450,550,600,650,750,800]
# ggF res Graviton
mass_points_radion_2017 = [250,260,270,280,300,320,350,400,450,500,550,600,650,700,750,800,850,900,1000,1250,1500,1750,2000,2500,3000]

# VBF res Graviton
mass_points_graviton_vbf_2017 = [250,260,270,280,300,320,350,400,450,500,600,650,700,750,850,900,1000,1750,2000]
# VBF res Graviton
mass_points_radion_vbf_2017 = [250,270,280,300,350,400,450,500,550,600,650,700,750,800,850,900,1000,1250,1500,1750,2000,3000]

In [ ]:
# 2018
# VBF non res
# VBFHHTo2B2Tau_CV_0_5_C2V_1_C3_1
# VBFHHTo2B2Tau_CV_1_5_C2V_1_C3_1
# VBFHHTo2B2Tau_CV_1_C2V_1_C3_0
# VBFHHTo2B2Tau_CV_1_C2V_1_C3_1
# VBFHHTo2B2Tau_CV_1_C2V_1_C3_2
# VBFHHTo2B2Tau_CV_1_C2V_2_C3_1

nodes_values_2018_vbf = [1,2,3,4,5,6]


# ggF non res
nodes_label_2018 = ['SM',2,3,4,5,6,7,8,9,10,11,12]
nodes_values_2018 = [0,2,3,4,5,6,7,8,9,10,11,12]

# ggF res Graviton
mass_points_graviton_2018 = [250,260,270,280,300,320,350,400,450,500,550,600,650,700,750,800,850,900,1000,1250,1500,1750,2000,2500,3000]
# ggF res Radion
mass_points_radion_2018 = [250,260,270,280,300,320,350,400,450,500,550,600,650,700,750,800,850,900,1000,1250,1500,1750,2000,2500,3000]

# VBF res Graviton
mass_points_graviton_vbf_2018 = [250,260,270,280,300,320,350,400,450,500,600,650,700,750,850,900,1000,1200,1750,2000]
# VBF res Graviton
mass_points_radion_vbf_2018 = [250,260,270,280,300,320,350,400,450,500,550,600,650,700,750,800,900,1000,1250,1500,1750,2000,3000]

In [ ]:
def sel_acc(y_true, y_pred, n_positions, n_exp):
    pred_sorted = tf.argsort(y_pred, axis=1, direction='DESCENDING')
    n_evt = tf.shape(y_true)[0]
    evt_id = tf.range(n_evt)
    matches_vec = []
    for n in range(n_positions):
        index = tf.transpose(tf.stack([evt_id, tf.reshape(pred_sorted[:, n], shape=(n_evt,))]))
        matches_vec.append(tf.gather_nd(y_true, index))
    matches_sum = tf.add_n(matches_vec)
    valid = tf.cast(tf.equal(matches_sum, n_exp), tf.float32)
    n_valid = tf.reduce_sum(valid)
    return n_valid / tf.cast(n_evt, tf.float32), n_valid, tf.cast(n_evt, tf.float32)

def sel_acc_2(y_true, y_pred):
    return sel_acc(y_true, y_pred, 2, 2)

In [ ]:
score_info = {}

In [ ]:
def acc_prod(point, sample_type, spin, year, discr, channel, new_method, deep_csv, res):
    index = []
    for node in range(0, len(point)) :
        if res == True :
            index.append((Z[:, 0, 0] == sample_type) & (Z[:, 0, 1] == spin) & (Z[:, 0, 2] == point[node]) & (Z[:, 0, 4] == year) & (Z[:, 0, 5] == channel))
        if res == False :
            index.append((Z[:, 0, 0] == sample_type) & (Z[:, 0, 3] == point[node]) & (Z[:, 0, 4] == year) & (Z[:, 0, 5] == channel))
    acc = []
    valid = []
    n_evt_sample = []
    for idx in range(0, len(index)):
        pred = Y[index[idx], : , 0]

        if new_method == True : 
            y_pred = predictions[index[idx]]
        elif new_method == False and deep_csv == False:
            y_pred = X[index[idx], : , discr]
        elif new_method == False and deep_csv == True:
            y_pred = data[index[idx], : , -1]
            
        ratio, n_valid, n_evt = sel_acc_2(pred, y_pred)
        x = ratio.numpy()
        acc.append(float(x))
        valid.append(float(n_valid))
        n_evt_sample.append(float(n_evt))
    return acc, valid, n_evt_sample

In [ ]:
def InitializeDict(x, x_labels, sample_type, spin, year, discr , channel, res, fig_name, x_axis_label,sample_title, log_xscale, name_tag):
    
    acc, valid, n_valid_sample = acc_prod(x, sample_type, spin, year, discr, channel, True, False, res)
    acc_df, valid_df, n_valid_sample_df = acc_prod(x, sample_type, spin, year, discr, channel, False, False, res)
    acc_deepcsv, valid_deepcvs, n_valid_sample_deepcsv = acc_prod(x, sample_type, spin, year, discr, channel, False, True, res)
    
    
    score_info[name_tag] = {"HH-btag": {"acc": acc , "valid": valid, "n_evt_sample": n_valid_sample}, 
                            "DF":      {"acc":acc_df , "valid": valid_df, "n_evt_sample": n_valid_sample_df}, 
                            "DeepCSV": {"acc":acc_deepcsv , "valid": valid_deepcvs, "n_evt_sample": n_valid_sample_deepcsv}
    }   
    return score_info

In [ ]:
# args = x, x_labels, sample_type, spin, year, discr , channel, res, fig_name, x_axis_label,sample_title,pdf, log_xscale, name_tag):
channel_name = ['ETau', 'MuTau', 'TauTau']
for channel in range(len(channel_name)):
    #2016
    InitializeDict(nodes_values_2016, nodes_values_2016, st.ggHH_NonRes, 0, 2016, 6, channel, False, 'plots/2016/non_res{}.pdf'.format(channel_name[channel]), 'node', 'Non Res ggF {}'.format(channel_name[channel]), False, 'ggHH_NonRes_{}_2016'.format(channel))
    InitializeDict(nodes_values_2016_vbf, nodes_values_2016_vbf, st.VBFHH_NonRes, 0, 2016, 6, channel, False, 'plots/2016/non_res_vbf_{}.pdf'.format(channel_name[channel]), 'node', 'Non Res VBF {}'.format(channel_name[channel]), False, 'VBFHH_NonRes{}_2016'.format(channel))
    InitializeDict(mass_points_graviton_2016, mass_points_graviton_2016, st.ggHH_Res, 2, 2016, 6, channel, True, 'plots/2016/graviton_{}.pdf'.format(channel_name[channel]), 'mass [GeV]', 'Graviton {}'.format(channel_name[channel]), True, 'ggHH_Res_R_{}_2016'.format(channel))
    InitializeDict(mass_points_radion_2016, mass_points_radion_2016, st.ggHH_Res, 0, 2016, 6, channel, True, 'plots/2016/radion_{}.pdf'.format(channel_name[channel]), 'mass [GeV]', 'Radion {}'.format(channel_name[channel]), True, 'ggHH_Res_G_{}_2016'.format(channel))
    #2017
    InitializeDict(nodes_values_2017, nodes_values_2017, st.ggHH_NonRes, 0, 2017, 6, channel, False, 'plots/2017/non_res{}.pdf'.format(channel_name[channel]), 'node', 'Non Res ggF {}'.format(channel_name[channel]), False, 'ggHH_NonRes_{}_2017'.format(channel))
    InitializeDict(nodes_values_2017_vbf, nodes_values_2017_vbf, st.VBFHH_NonRes, 0, 2017, 6, channel, False, 'plots/2017/non_res_vbf_{}.pdf'.format(channel_name[channel]), 'node', 'Non Res VBF {}'.format(channel_name[channel]), False, 'VBFHH_NonRes_{}_2017'.format(channel))
    InitializeDict(mass_points_graviton_2017, mass_points_graviton_2017, st.ggHH_Res, 2, 2017, 6, channel, True, 'plots/2017/graviton_{}.pdf'.format(channel_name[channel]), 'mass [GeV]', 'Graviton {}'.format(channel_name[channel]), True, 'ggHH_Res_R_{}_2017'.format(channel))
    InitializeDict(mass_points_radion_2017, mass_points_radion_2017, st.ggHH_Res, 0, 2017, 6, channel, True, 'plots/2017/radion_{}.pdf'.format(channel_name[channel]), 'mass [GeV]', 'Radion {}'.format(channel_name[channel]), True, 'ggHH_Res_G_{}_2017'.format(channel))
    InitializeDict(mass_points_graviton_vbf_2017, mass_points_graviton_vbf_2017, st.VBFHH_Res, 2, 2017, 6, channel, True, 'plots/2017/VBF_graviton_{}.pdf'.format(channel_name[channel]), 'mass [GeV]', 'VBF Graviton {}'.format(channel_name[channel]), False, 'VBFHH_Res_R_{}_2017'.format(channel))
    InitializeDict(mass_points_radion_vbf_2017, mass_points_radion_vbf_2017, st.VBFHH_Res, 0, 2017, 6, channel, True, 'plots/2017/VBF_radion_{}.pdf'.format(channel_name[channel]), 'mass [GeV]', 'VBF Radion {}'.format(channel_name[channel]), False, 'VBFHH_Res_G_{}_2017'.format(channel))
    #2018
    InitializeDict(nodes_values_2018, nodes_values_2018, st.ggHH_NonRes, 0, 2018, 6, channel, False, 'plots/2018/non_res{}.pdf'.format(channel_name[channel]), 'node', 'Non Res ggF {}'.format(channel_name[channel]), False, 'ggHH_NonRes_{}_2018'.format(channel))
    InitializeDict(nodes_values_2018_vbf, nodes_values_2018_vbf, st.VBFHH_NonRes, 0, 2018, 6, channel, False, 'plots/2018/non_res_vbf_{}.pdf'.format(channel_name[channel]), 'node', 'Non Res VBF {}'.format(channel_name[channel]), False, 'VBFHH_NonRes_{}_2018'.format(channel))
    InitializeDict(mass_points_graviton_2018, mass_points_graviton_2018, st.ggHH_Res, 2, 2018, 6, channel, True, 'plots/2018/graviton_{}.pdf'.format(channel_name[channel]), 'mass [GeV]', 'Graviton {}'.format(channel_name[channel]), True, 'ggHH_Res_R_{}_2018'.format(channel))
    InitializeDict(mass_points_radion_2018, mass_points_radion_2018, st.ggHH_Res, 0, 2018, 6, channel, True, 'plots/2018/radion_{}.pdf'.format(channel_name[channel]), 'mass [GeV]', 'Radion {}'.format(channel_name[channel]), True, 'ggHH_Res_G_{}_2018'.format(channel))
    InitializeDict(mass_points_graviton_vbf_2018, mass_points_graviton_vbf_2018, st.VBFHH_Res, 2, 2018, 6, channel, True, 'plots/2018/VBF_graviton_{}.pdf'.format(channel_name[channel]), 'mass [GeV]', 'VBF Graviton {}'.format(channel_name[channel]), False, 'VBFHH_Res_R_{}_2018'.format(channel))
    InitializeDict(mass_points_radion_vbf_2018, mass_points_radion_vbf_2018, st.VBFHH_Res, 0, 2018, 6, channel, True, 'plots/2018/VBF_radion_{}.pdf'.format(channel_name[channel]), 'mass [GeV]', 'VBF Radion {}'.format(channel_name[channel]), False, 'VBFHH_Res_G_{}_2018'.format(channel))

In [ ]:
with open('score_par_1.json', 'w') as f:
    f.write(json.dumps(score_info, indent=4))