Prepearing data


In [1]:
%matplotlib inline
import matplotlib.pyplot as plt

plt.style.use('ggplot')

import numpy as np
import pandas as pd
import _pickle as pickle
import os

import re

In [2]:
sublabels = pd.read_hdf('//data/cms2010/data_not_f_JetHT.hdf5', "data")

In [3]:
np.mean(sublabels)


Out[3]:
runId       279241.534327
lumiId         694.025505
isSig            0.974962
Csc              0.998577
Dt               0.998577
Ecal             0.996484
Egamma           0.995129
Es               0.998577
Hcal             0.998000
Hlt              0.998521
Jetmet           0.995846
L1tcalo          0.983631
L1tmu            0.998245
Lumi             0.998577
Muon             0.995613
Pix              0.997570
Rpc              0.995006
Strip            0.996619
Track            0.995613
new_json         0.985005
dtype: float64

In [4]:
data_features = pd.read_hdf('/home/olgako/data/data_features_JetHT.hdf5', "data")
labels = 1-pd.read_hdf('/home/olgako/data/labels_JetHT.hdf5', 'labels')

In [5]:
all_features = data_features.columns

In [6]:
Muon_features = [s for s in all_features if (s[:3] == 'qMu')]# and (s[3:7] != 'Cosm')]
len(Muon_features), Muon_features


Out[6]:
(439,
 ['qMu0En_0',
  'qMu0En_1',
  'qMu0En_2',
  'qMu0En_3',
  'qMu0En_4',
  'qMu0En_5',
  'qMu0En_6',
  'qMu0Eta_0',
  'qMu0Eta_1',
  'qMu0Eta_2',
  'qMu0Eta_3',
  'qMu0Eta_4',
  'qMu0Eta_5',
  'qMu0Eta_6',
  'qMu0Phi_0',
  'qMu0Phi_1',
  'qMu0Phi_2',
  'qMu0Phi_3',
  'qMu0Phi_4',
  'qMu0Phi_5',
  'qMu0Phi_6',
  'qMu0Pt_0',
  'qMu0Pt_1',
  'qMu0Pt_2',
  'qMu0Pt_3',
  'qMu0Pt_4',
  'qMu0Pt_5',
  'qMu0Pt_6',
  'qMu1En_0',
  'qMu1En_1',
  'qMu1En_3',
  'qMu1En_4',
  'qMu1En_5',
  'qMu1En_6',
  'qMu1Eta_0',
  'qMu1Eta_1',
  'qMu1Eta_2',
  'qMu1Eta_3',
  'qMu1Eta_4',
  'qMu1Eta_5',
  'qMu1Eta_6',
  'qMu1Phi_0',
  'qMu1Phi_1',
  'qMu1Phi_2',
  'qMu1Phi_3',
  'qMu1Phi_4',
  'qMu1Phi_5',
  'qMu1Phi_6',
  'qMu1Pt_0',
  'qMu1Pt_1',
  'qMu1Pt_3',
  'qMu1Pt_4',
  'qMu1Pt_5',
  'qMu1Pt_6',
  'qMu2En_0',
  'qMu2En_1',
  'qMu2En_3',
  'qMu2En_4',
  'qMu2En_5',
  'qMu2En_6',
  'qMu2Eta_0',
  'qMu2Eta_1',
  'qMu2Eta_2',
  'qMu2Eta_3',
  'qMu2Eta_4',
  'qMu2Eta_5',
  'qMu2Eta_6',
  'qMu2Phi_0',
  'qMu2Phi_1',
  'qMu2Phi_2',
  'qMu2Phi_3',
  'qMu2Phi_4',
  'qMu2Phi_5',
  'qMu2Phi_6',
  'qMu2Pt_0',
  'qMu2Pt_1',
  'qMu2Pt_3',
  'qMu2Pt_4',
  'qMu2Pt_5',
  'qMu2Pt_6',
  'qMu3En_0',
  'qMu3En_1',
  'qMu3En_3',
  'qMu3En_4',
  'qMu3En_5',
  'qMu3En_6',
  'qMu3Eta_0',
  'qMu3Eta_1',
  'qMu3Eta_2',
  'qMu3Eta_3',
  'qMu3Eta_4',
  'qMu3Eta_5',
  'qMu3Eta_6',
  'qMu3Phi_0',
  'qMu3Phi_1',
  'qMu3Phi_2',
  'qMu3Phi_3',
  'qMu3Phi_4',
  'qMu3Phi_5',
  'qMu3Phi_6',
  'qMu3Pt_0',
  'qMu3Pt_1',
  'qMu3Pt_3',
  'qMu3Pt_4',
  'qMu3Pt_5',
  'qMu3Pt_6',
  'qMu4En_0',
  'qMu4En_1',
  'qMu4En_3',
  'qMu4En_4',
  'qMu4En_5',
  'qMu4En_6',
  'qMu4Eta_0',
  'qMu4Eta_1',
  'qMu4Eta_2',
  'qMu4Eta_3',
  'qMu4Eta_4',
  'qMu4Eta_5',
  'qMu4Eta_6',
  'qMu4Phi_0',
  'qMu4Phi_1',
  'qMu4Phi_2',
  'qMu4Phi_3',
  'qMu4Phi_4',
  'qMu4Phi_5',
  'qMu4Phi_6',
  'qMu4Pt_0',
  'qMu4Pt_1',
  'qMu4Pt_3',
  'qMu4Pt_4',
  'qMu4Pt_5',
  'qMu4Pt_6',
  'qMu5En_0',
  'qMu5En_1',
  'qMu5En_3',
  'qMu5En_4',
  'qMu5En_5',
  'qMu5En_6',
  'qMu5Eta_0',
  'qMu5Eta_1',
  'qMu5Eta_2',
  'qMu5Eta_3',
  'qMu5Eta_4',
  'qMu5Eta_5',
  'qMu5Eta_6',
  'qMu5Phi_0',
  'qMu5Phi_1',
  'qMu5Phi_2',
  'qMu5Phi_3',
  'qMu5Phi_4',
  'qMu5Phi_5',
  'qMu5Phi_6',
  'qMu5Pt_0',
  'qMu5Pt_1',
  'qMu5Pt_3',
  'qMu5Pt_4',
  'qMu5Pt_5',
  'qMu5Pt_6',
  'qMuCh__0',
  'qMuCh__1',
  'qMuCh__4',
  'qMuCosm0En_0',
  'qMuCosm0En_1',
  'qMuCosm0En_3',
  'qMuCosm0En_4',
  'qMuCosm0En_5',
  'qMuCosm0En_6',
  'qMuCosm0Eta_0',
  'qMuCosm0Eta_1',
  'qMuCosm0Eta_2',
  'qMuCosm0Eta_3',
  'qMuCosm0Eta_5',
  'qMuCosm0Eta_6',
  'qMuCosm0Phi_0',
  'qMuCosm0Phi_1',
  'qMuCosm0Phi_2',
  'qMuCosm0Phi_3',
  'qMuCosm0Phi_4',
  'qMuCosm0Phi_6',
  'qMuCosm0Pt_0',
  'qMuCosm0Pt_1',
  'qMuCosm0Pt_3',
  'qMuCosm0Pt_4',
  'qMuCosm0Pt_5',
  'qMuCosm0Pt_6',
  'qMuCosm1En_0',
  'qMuCosm1En_1',
  'qMuCosm1En_5',
  'qMuCosm1En_6',
  'qMuCosm1Eta_0',
  'qMuCosm1Eta_1',
  'qMuCosm1Eta_2',
  'qMuCosm1Eta_5',
  'qMuCosm1Eta_6',
  'qMuCosm1Phi_0',
  'qMuCosm1Phi_1',
  'qMuCosm1Phi_2',
  'qMuCosm1Phi_3',
  'qMuCosm1Phi_6',
  'qMuCosm1Pt_0',
  'qMuCosm1Pt_1',
  'qMuCosm1Pt_5',
  'qMuCosm1Pt_6',
  'qMuCosm2En_0',
  'qMuCosm2En_1',
  'qMuCosm2En_5',
  'qMuCosm2En_6',
  'qMuCosm2Eta_0',
  'qMuCosm2Eta_1',
  'qMuCosm2Eta_2',
  'qMuCosm2Eta_6',
  'qMuCosm2Phi_0',
  'qMuCosm2Phi_1',
  'qMuCosm2Phi_2',
  'qMuCosm2Phi_6',
  'qMuCosm2Pt_0',
  'qMuCosm2Pt_1',
  'qMuCosm2Pt_5',
  'qMuCosm2Pt_6',
  'qMuCosm3En_0',
  'qMuCosm3En_1',
  'qMuCosm3En_6',
  'qMuCosm3Eta_0',
  'qMuCosm3Eta_1',
  'qMuCosm3Eta_2',
  'qMuCosm3Eta_6',
  'qMuCosm3Phi_0',
  'qMuCosm3Phi_1',
  'qMuCosm3Phi_2',
  'qMuCosm3Phi_6',
  'qMuCosm3Pt_0',
  'qMuCosm3Pt_1',
  'qMuCosm3Pt_6',
  'qMuCosm4En_0',
  'qMuCosm4En_1',
  'qMuCosm4En_6',
  'qMuCosm4Eta_0',
  'qMuCosm4Eta_1',
  'qMuCosm4Eta_2',
  'qMuCosm4Eta_6',
  'qMuCosm4Phi_0',
  'qMuCosm4Phi_1',
  'qMuCosm4Phi_2',
  'qMuCosm4Phi_6',
  'qMuCosm4Pt_0',
  'qMuCosm4Pt_1',
  'qMuCosm4Pt_6',
  'qMuCosm5En_0',
  'qMuCosm5En_1',
  'qMuCosm5En_6',
  'qMuCosm5Eta_0',
  'qMuCosm5Eta_1',
  'qMuCosm5Eta_2',
  'qMuCosm5Eta_6',
  'qMuCosm5Phi_0',
  'qMuCosm5Phi_1',
  'qMuCosm5Phi_2',
  'qMuCosm5Phi_6',
  'qMuCosm5Pt_0',
  'qMuCosm5Pt_1',
  'qMuCosm5Pt_6',
  'qMuCosmCh__0',
  'qMuCosmCh__1',
  'qMuCosmCh__4',
  'qMuCosmEn__0',
  'qMuCosmEn__1',
  'qMuCosmEn__2',
  'qMuCosmEn__3',
  'qMuCosmEn__4',
  'qMuCosmEn__5',
  'qMuCosmEn__6',
  'qMuCosmEta_0',
  'qMuCosmEta_1',
  'qMuCosmEta_2',
  'qMuCosmEta_3',
  'qMuCosmEta_4',
  'qMuCosmEta_5',
  'qMuCosmEta_6',
  'qMuCosmLeg0En_0',
  'qMuCosmLeg0En_1',
  'qMuCosmLeg0En_5',
  'qMuCosmLeg0En_6',
  'qMuCosmLeg0Eta_0',
  'qMuCosmLeg0Eta_1',
  'qMuCosmLeg0Eta_2',
  'qMuCosmLeg0Eta_5',
  'qMuCosmLeg0Eta_6',
  'qMuCosmLeg0Phi_0',
  'qMuCosmLeg0Phi_1',
  'qMuCosmLeg0Phi_2',
  'qMuCosmLeg0Phi_3',
  'qMuCosmLeg0Phi_6',
  'qMuCosmLeg0Pt_0',
  'qMuCosmLeg0Pt_1',
  'qMuCosmLeg0Pt_5',
  'qMuCosmLeg0Pt_6',
  'qMuCosmLeg1En_0',
  'qMuCosmLeg1En_1',
  'qMuCosmLeg1En_6',
  'qMuCosmLeg1Eta_0',
  'qMuCosmLeg1Eta_1',
  'qMuCosmLeg1Eta_2',
  'qMuCosmLeg1Eta_6',
  'qMuCosmLeg1Phi_0',
  'qMuCosmLeg1Phi_1',
  'qMuCosmLeg1Phi_2',
  'qMuCosmLeg1Phi_6',
  'qMuCosmLeg1Pt_0',
  'qMuCosmLeg1Pt_1',
  'qMuCosmLeg1Pt_6',
  'qMuCosmLeg2En_0',
  'qMuCosmLeg2En_1',
  'qMuCosmLeg2En_6',
  'qMuCosmLeg2Eta_0',
  'qMuCosmLeg2Eta_1',
  'qMuCosmLeg2Eta_2',
  'qMuCosmLeg2Eta_6',
  'qMuCosmLeg2Phi_0',
  'qMuCosmLeg2Phi_1',
  'qMuCosmLeg2Phi_2',
  'qMuCosmLeg2Phi_6',
  'qMuCosmLeg2Pt_0',
  'qMuCosmLeg2Pt_1',
  'qMuCosmLeg2Pt_6',
  'qMuCosmLeg3En_0',
  'qMuCosmLeg3En_1',
  'qMuCosmLeg3En_6',
  'qMuCosmLeg3Eta_0',
  'qMuCosmLeg3Eta_1',
  'qMuCosmLeg3Eta_2',
  'qMuCosmLeg3Eta_6',
  'qMuCosmLeg3Phi_0',
  'qMuCosmLeg3Phi_1',
  'qMuCosmLeg3Phi_2',
  'qMuCosmLeg3Phi_6',
  'qMuCosmLeg3Pt_0',
  'qMuCosmLeg3Pt_1',
  'qMuCosmLeg3Pt_6',
  'qMuCosmLeg4En_0',
  'qMuCosmLeg4En_1',
  'qMuCosmLeg4En_6',
  'qMuCosmLeg4Eta_0',
  'qMuCosmLeg4Eta_1',
  'qMuCosmLeg4Eta_2',
  'qMuCosmLeg4Eta_6',
  'qMuCosmLeg4Phi_0',
  'qMuCosmLeg4Phi_1',
  'qMuCosmLeg4Phi_2',
  'qMuCosmLeg4Phi_6',
  'qMuCosmLeg4Pt_0',
  'qMuCosmLeg4Pt_1',
  'qMuCosmLeg4Pt_6',
  'qMuCosmLeg5En_0',
  'qMuCosmLeg5En_1',
  'qMuCosmLeg5En_6',
  'qMuCosmLeg5Eta_0',
  'qMuCosmLeg5Eta_1',
  'qMuCosmLeg5Eta_2',
  'qMuCosmLeg5Eta_6',
  'qMuCosmLeg5Phi_0',
  'qMuCosmLeg5Phi_1',
  'qMuCosmLeg5Phi_2',
  'qMuCosmLeg5Phi_6',
  'qMuCosmLeg5Pt_0',
  'qMuCosmLeg5Pt_1',
  'qMuCosmLeg5Pt_6',
  'qMuCosmLegCh__0',
  'qMuCosmLegCh__1',
  'qMuCosmLegCh__4',
  'qMuCosmLegEn__0',
  'qMuCosmLegEn__1',
  'qMuCosmLegEn__2',
  'qMuCosmLegEn__3',
  'qMuCosmLegEn__4',
  'qMuCosmLegEn__5',
  'qMuCosmLegEn__6',
  'qMuCosmLegEta_0',
  'qMuCosmLegEta_1',
  'qMuCosmLegEta_2',
  'qMuCosmLegEta_3',
  'qMuCosmLegEta_4',
  'qMuCosmLegEta_5',
  'qMuCosmLegEta_6',
  'qMuCosmLegPhi_0',
  'qMuCosmLegPhi_1',
  'qMuCosmLegPhi_2',
  'qMuCosmLegPhi_3',
  'qMuCosmLegPhi_4',
  'qMuCosmLegPhi_5',
  'qMuCosmLegPhi_6',
  'qMuCosmLegPt_0',
  'qMuCosmLegPt_1',
  'qMuCosmLegPt_2',
  'qMuCosmLegPt_3',
  'qMuCosmLegPt_4',
  'qMuCosmLegPt_5',
  'qMuCosmLegPt_6',
  'qMuCosmPhi_0',
  'qMuCosmPhi_1',
  'qMuCosmPhi_2',
  'qMuCosmPhi_3',
  'qMuCosmPhi_4',
  'qMuCosmPhi_5',
  'qMuCosmPhi_6',
  'qMuCosmPt_0',
  'qMuCosmPt_1',
  'qMuCosmPt_2',
  'qMuCosmPt_3',
  'qMuCosmPt_4',
  'qMuCosmPt_5',
  'qMuCosmPt_6',
  'qMuEn__0',
  'qMuEn__1',
  'qMuEn__2',
  'qMuEn__3',
  'qMuEn__4',
  'qMuEn__5',
  'qMuEn__6',
  'qMuEta_0',
  'qMuEta_1',
  'qMuEta_2',
  'qMuEta_3',
  'qMuEta_4',
  'qMuEta_5',
  'qMuEta_6',
  'qMuPhi_0',
  'qMuPhi_1',
  'qMuPhi_2',
  'qMuPhi_3',
  'qMuPhi_4',
  'qMuPhi_5',
  'qMuPhi_6',
  'qMuPt_0',
  'qMuPt_1',
  'qMuPt_2',
  'qMuPt_3',
  'qMuPt_4',
  'qMuPt_5',
  'qMuPt_6'])

In [7]:
Pho_features = [s for s in all_features if s[:4] == 'qPho']
len(Pho_features), Pho_features


Out[7]:
(224,
 ['qPho0En_0',
  'qPho0En_1',
  'qPho0En_3',
  'qPho0En_4',
  'qPho0En_5',
  'qPho0En_6',
  'qPho0Eta_0',
  'qPho0Eta_1',
  'qPho0Eta_2',
  'qPho0Eta_3',
  'qPho0Eta_4',
  'qPho0Eta_5',
  'qPho0Eta_6',
  'qPho0Phi_0',
  'qPho0Phi_1',
  'qPho0Phi_2',
  'qPho0Phi_3',
  'qPho0Phi_4',
  'qPho0Phi_5',
  'qPho0Phi_6',
  'qPho0Pt_0',
  'qPho0Pt_1',
  'qPho0Pt_3',
  'qPho0Pt_4',
  'qPho0Pt_5',
  'qPho0Pt_6',
  'qPho1En_0',
  'qPho1En_1',
  'qPho1En_3',
  'qPho1En_4',
  'qPho1En_5',
  'qPho1En_6',
  'qPho1Eta_0',
  'qPho1Eta_1',
  'qPho1Eta_2',
  'qPho1Eta_3',
  'qPho1Eta_5',
  'qPho1Eta_6',
  'qPho1Phi_0',
  'qPho1Phi_1',
  'qPho1Phi_2',
  'qPho1Phi_3',
  'qPho1Phi_5',
  'qPho1Phi_6',
  'qPho1Pt_0',
  'qPho1Pt_1',
  'qPho1Pt_3',
  'qPho1Pt_4',
  'qPho1Pt_5',
  'qPho1Pt_6',
  'qPho2En_0',
  'qPho2En_1',
  'qPho2En_4',
  'qPho2En_5',
  'qPho2En_6',
  'qPho2Eta_0',
  'qPho2Eta_1',
  'qPho2Eta_2',
  'qPho2Eta_3',
  'qPho2Eta_5',
  'qPho2Eta_6',
  'qPho2Phi_0',
  'qPho2Phi_1',
  'qPho2Phi_2',
  'qPho2Phi_3',
  'qPho2Phi_5',
  'qPho2Phi_6',
  'qPho2Pt_0',
  'qPho2Pt_1',
  'qPho2Pt_4',
  'qPho2Pt_5',
  'qPho2Pt_6',
  'qPho3En_0',
  'qPho3En_1',
  'qPho3En_4',
  'qPho3En_5',
  'qPho3En_6',
  'qPho3Eta_0',
  'qPho3Eta_1',
  'qPho3Eta_2',
  'qPho3Eta_3',
  'qPho3Eta_5',
  'qPho3Eta_6',
  'qPho3Phi_0',
  'qPho3Phi_1',
  'qPho3Phi_2',
  'qPho3Phi_3',
  'qPho3Phi_5',
  'qPho3Phi_6',
  'qPho3Pt_0',
  'qPho3Pt_1',
  'qPho3Pt_4',
  'qPho3Pt_5',
  'qPho3Pt_6',
  'qPho4En_0',
  'qPho4En_1',
  'qPho4En_5',
  'qPho4En_6',
  'qPho4Eta_0',
  'qPho4Eta_1',
  'qPho4Eta_2',
  'qPho4Eta_6',
  'qPho4Phi_0',
  'qPho4Phi_1',
  'qPho4Phi_2',
  'qPho4Phi_6',
  'qPho4Pt_0',
  'qPho4Pt_1',
  'qPho4Pt_5',
  'qPho4Pt_6',
  'qPho5En_0',
  'qPho5En_1',
  'qPho5En_5',
  'qPho5En_6',
  'qPho5Eta_0',
  'qPho5Eta_1',
  'qPho5Eta_2',
  'qPho5Eta_6',
  'qPho5Phi_0',
  'qPho5Phi_1',
  'qPho5Phi_2',
  'qPho5Phi_6',
  'qPho5Pt_0',
  'qPho5Pt_1',
  'qPho5Pt_5',
  'qPho5Pt_6',
  'qPhoEn__0',
  'qPhoEn__1',
  'qPhoEn__2',
  'qPhoEn__3',
  'qPhoEn__4',
  'qPhoEn__5',
  'qPhoEn__6',
  'qPhoEta_0',
  'qPhoEta_1',
  'qPhoEta_2',
  'qPhoEta_3',
  'qPhoEta_4',
  'qPhoEta_5',
  'qPhoEta_6',
  'qPhoPhi_0',
  'qPhoPhi_1',
  'qPhoPhi_2',
  'qPhoPhi_3',
  'qPhoPhi_4',
  'qPhoPhi_5',
  'qPhoPhi_6',
  'qPhoPt_0',
  'qPhoPt_1',
  'qPhoPt_2',
  'qPhoPt_3',
  'qPhoPt_4',
  'qPhoPt_5',
  'qPhoPt_6',
  'qPhoe1x5__0',
  'qPhoe1x5__1',
  'qPhoe1x5__2',
  'qPhoe1x5__3',
  'qPhoe1x5__4',
  'qPhoe1x5__5',
  'qPhoe1x5__6',
  'qPhoe2x5__0',
  'qPhoe2x5__1',
  'qPhoe2x5__2',
  'qPhoe2x5__3',
  'qPhoe2x5__4',
  'qPhoe2x5__5',
  'qPhoe2x5__6',
  'qPhoe3x3__0',
  'qPhoe3x3__1',
  'qPhoe3x3__2',
  'qPhoe3x3__3',
  'qPhoe3x3__4',
  'qPhoe3x3__5',
  'qPhoe3x3__6',
  'qPhoe5x5__0',
  'qPhoe5x5__1',
  'qPhoe5x5__2',
  'qPhoe5x5__3',
  'qPhoe5x5__4',
  'qPhoe5x5__5',
  'qPhoe5x5__6',
  'qPhomaxenxtal__0',
  'qPhomaxenxtal__1',
  'qPhomaxenxtal__2',
  'qPhomaxenxtal__3',
  'qPhomaxenxtal__4',
  'qPhomaxenxtal__5',
  'qPhomaxenxtal__6',
  'qPhor1x5__0',
  'qPhor1x5__1',
  'qPhor1x5__2',
  'qPhor1x5__3',
  'qPhor1x5__4',
  'qPhor1x5__5',
  'qPhor1x5__6',
  'qPhor2x5__0',
  'qPhor2x5__1',
  'qPhor2x5__2',
  'qPhor2x5__3',
  'qPhor2x5__4',
  'qPhor2x5__5',
  'qPhor2x5__6',
  'qPhor9__0',
  'qPhor9__1',
  'qPhor9__2',
  'qPhor9__3',
  'qPhor9__4',
  'qPhor9__5',
  'qPhor9__6',
  'qPhosigmaIeta__0',
  'qPhosigmaIeta__1',
  'qPhosigmaIeta__2',
  'qPhosigmaIeta__3',
  'qPhosigmaIeta__4',
  'qPhosigmaIeta__5',
  'qPhosigmaIeta__6',
  'qPhosigmaeta__0',
  'qPhosigmaeta__1',
  'qPhosigmaeta__2',
  'qPhosigmaeta__3',
  'qPhosigmaeta__4',
  'qPhosigmaeta__5',
  'qPhosigmaeta__6'])

In [8]:
Cal_features = [s for s in all_features if s[:4] == 'qCal']
len(Cal_features), Cal_features


Out[8]:
(280,
 ['qCalJet0En_0',
  'qCalJet0En_1',
  'qCalJet0En_2',
  'qCalJet0En_3',
  'qCalJet0En_4',
  'qCalJet0En_5',
  'qCalJet0En_6',
  'qCalJet0Eta_0',
  'qCalJet0Eta_1',
  'qCalJet0Eta_2',
  'qCalJet0Eta_3',
  'qCalJet0Eta_4',
  'qCalJet0Eta_5',
  'qCalJet0Eta_6',
  'qCalJet0Phi_0',
  'qCalJet0Phi_1',
  'qCalJet0Phi_2',
  'qCalJet0Phi_3',
  'qCalJet0Phi_4',
  'qCalJet0Phi_5',
  'qCalJet0Phi_6',
  'qCalJet0Pt_0',
  'qCalJet0Pt_1',
  'qCalJet0Pt_2',
  'qCalJet0Pt_3',
  'qCalJet0Pt_4',
  'qCalJet0Pt_5',
  'qCalJet0Pt_6',
  'qCalJet1En_0',
  'qCalJet1En_1',
  'qCalJet1En_2',
  'qCalJet1En_3',
  'qCalJet1En_4',
  'qCalJet1En_5',
  'qCalJet1En_6',
  'qCalJet1Eta_0',
  'qCalJet1Eta_1',
  'qCalJet1Eta_2',
  'qCalJet1Eta_3',
  'qCalJet1Eta_4',
  'qCalJet1Eta_5',
  'qCalJet1Eta_6',
  'qCalJet1Phi_0',
  'qCalJet1Phi_1',
  'qCalJet1Phi_2',
  'qCalJet1Phi_3',
  'qCalJet1Phi_4',
  'qCalJet1Phi_5',
  'qCalJet1Phi_6',
  'qCalJet1Pt_0',
  'qCalJet1Pt_1',
  'qCalJet1Pt_2',
  'qCalJet1Pt_3',
  'qCalJet1Pt_4',
  'qCalJet1Pt_5',
  'qCalJet1Pt_6',
  'qCalJet2En_0',
  'qCalJet2En_1',
  'qCalJet2En_2',
  'qCalJet2En_3',
  'qCalJet2En_4',
  'qCalJet2En_5',
  'qCalJet2En_6',
  'qCalJet2Eta_0',
  'qCalJet2Eta_1',
  'qCalJet2Eta_2',
  'qCalJet2Eta_3',
  'qCalJet2Eta_4',
  'qCalJet2Eta_5',
  'qCalJet2Eta_6',
  'qCalJet2Phi_0',
  'qCalJet2Phi_1',
  'qCalJet2Phi_2',
  'qCalJet2Phi_3',
  'qCalJet2Phi_4',
  'qCalJet2Phi_5',
  'qCalJet2Phi_6',
  'qCalJet2Pt_0',
  'qCalJet2Pt_1',
  'qCalJet2Pt_2',
  'qCalJet2Pt_3',
  'qCalJet2Pt_4',
  'qCalJet2Pt_5',
  'qCalJet2Pt_6',
  'qCalJet3En_0',
  'qCalJet3En_1',
  'qCalJet3En_2',
  'qCalJet3En_3',
  'qCalJet3En_4',
  'qCalJet3En_5',
  'qCalJet3En_6',
  'qCalJet3Eta_0',
  'qCalJet3Eta_1',
  'qCalJet3Eta_2',
  'qCalJet3Eta_3',
  'qCalJet3Eta_4',
  'qCalJet3Eta_5',
  'qCalJet3Eta_6',
  'qCalJet3Phi_0',
  'qCalJet3Phi_1',
  'qCalJet3Phi_2',
  'qCalJet3Phi_3',
  'qCalJet3Phi_4',
  'qCalJet3Phi_5',
  'qCalJet3Phi_6',
  'qCalJet3Pt_0',
  'qCalJet3Pt_1',
  'qCalJet3Pt_2',
  'qCalJet3Pt_3',
  'qCalJet3Pt_4',
  'qCalJet3Pt_5',
  'qCalJet3Pt_6',
  'qCalJet4En_0',
  'qCalJet4En_1',
  'qCalJet4En_2',
  'qCalJet4En_3',
  'qCalJet4En_4',
  'qCalJet4En_5',
  'qCalJet4En_6',
  'qCalJet4Eta_0',
  'qCalJet4Eta_1',
  'qCalJet4Eta_2',
  'qCalJet4Eta_3',
  'qCalJet4Eta_4',
  'qCalJet4Eta_5',
  'qCalJet4Eta_6',
  'qCalJet4Phi_0',
  'qCalJet4Phi_1',
  'qCalJet4Phi_2',
  'qCalJet4Phi_3',
  'qCalJet4Phi_4',
  'qCalJet4Phi_5',
  'qCalJet4Phi_6',
  'qCalJet4Pt_0',
  'qCalJet4Pt_1',
  'qCalJet4Pt_2',
  'qCalJet4Pt_3',
  'qCalJet4Pt_4',
  'qCalJet4Pt_5',
  'qCalJet4Pt_6',
  'qCalJet5En_0',
  'qCalJet5En_1',
  'qCalJet5En_2',
  'qCalJet5En_3',
  'qCalJet5En_4',
  'qCalJet5En_5',
  'qCalJet5En_6',
  'qCalJet5Eta_0',
  'qCalJet5Eta_1',
  'qCalJet5Eta_2',
  'qCalJet5Eta_3',
  'qCalJet5Eta_4',
  'qCalJet5Eta_5',
  'qCalJet5Eta_6',
  'qCalJet5Phi_0',
  'qCalJet5Phi_1',
  'qCalJet5Phi_2',
  'qCalJet5Phi_3',
  'qCalJet5Phi_4',
  'qCalJet5Phi_5',
  'qCalJet5Phi_6',
  'qCalJet5Pt_0',
  'qCalJet5Pt_1',
  'qCalJet5Pt_2',
  'qCalJet5Pt_3',
  'qCalJet5Pt_4',
  'qCalJet5Pt_5',
  'qCalJet5Pt_6',
  'qCalJetEn_0',
  'qCalJetEn_1',
  'qCalJetEn_2',
  'qCalJetEn_3',
  'qCalJetEn_4',
  'qCalJetEn_5',
  'qCalJetEn_6',
  'qCalJetEta_0',
  'qCalJetEta_1',
  'qCalJetEta_2',
  'qCalJetEta_3',
  'qCalJetEta_4',
  'qCalJetEta_5',
  'qCalJetEta_6',
  'qCalJetPhi_0',
  'qCalJetPhi_1',
  'qCalJetPhi_2',
  'qCalJetPhi_3',
  'qCalJetPhi_4',
  'qCalJetPhi_5',
  'qCalJetPhi_6',
  'qCalJetPt_0',
  'qCalJetPt_1',
  'qCalJetPt_2',
  'qCalJetPt_3',
  'qCalJetPt_4',
  'qCalJetPt_5',
  'qCalJetPt_6',
  'qCalMETBEEn_0',
  'qCalMETBEEn_1',
  'qCalMETBEEn_2',
  'qCalMETBEEn_3',
  'qCalMETBEEn_4',
  'qCalMETBEEn_5',
  'qCalMETBEEn_6',
  'qCalMETBEFOEn_0',
  'qCalMETBEFOEn_1',
  'qCalMETBEFOEn_2',
  'qCalMETBEFOEn_3',
  'qCalMETBEFOEn_4',
  'qCalMETBEFOEn_5',
  'qCalMETBEFOEn_6',
  'qCalMETBEFOPhi_0',
  'qCalMETBEFOPhi_1',
  'qCalMETBEFOPhi_2',
  'qCalMETBEFOPhi_3',
  'qCalMETBEFOPhi_4',
  'qCalMETBEFOPhi_5',
  'qCalMETBEFOPhi_6',
  'qCalMETBEFOPt_0',
  'qCalMETBEFOPt_1',
  'qCalMETBEFOPt_2',
  'qCalMETBEFOPt_3',
  'qCalMETBEFOPt_4',
  'qCalMETBEFOPt_5',
  'qCalMETBEFOPt_6',
  'qCalMETBEPhi_0',
  'qCalMETBEPhi_1',
  'qCalMETBEPhi_2',
  'qCalMETBEPhi_3',
  'qCalMETBEPhi_4',
  'qCalMETBEPhi_5',
  'qCalMETBEPhi_6',
  'qCalMETBEPt_0',
  'qCalMETBEPt_1',
  'qCalMETBEPt_2',
  'qCalMETBEPt_3',
  'qCalMETBEPt_4',
  'qCalMETBEPt_5',
  'qCalMETBEPt_6',
  'qCalMETEn_0',
  'qCalMETEn_1',
  'qCalMETEn_2',
  'qCalMETEn_3',
  'qCalMETEn_4',
  'qCalMETEn_5',
  'qCalMETEn_6',
  'qCalMETMEn_0',
  'qCalMETMEn_1',
  'qCalMETMEn_2',
  'qCalMETMEn_3',
  'qCalMETMEn_4',
  'qCalMETMEn_5',
  'qCalMETMEn_6',
  'qCalMETMPhi_0',
  'qCalMETMPhi_1',
  'qCalMETMPhi_2',
  'qCalMETMPhi_3',
  'qCalMETMPhi_4',
  'qCalMETMPhi_5',
  'qCalMETMPhi_6',
  'qCalMETMPt_0',
  'qCalMETMPt_1',
  'qCalMETMPt_2',
  'qCalMETMPt_3',
  'qCalMETMPt_4',
  'qCalMETMPt_5',
  'qCalMETMPt_6',
  'qCalMETPhi_0',
  'qCalMETPhi_1',
  'qCalMETPhi_2',
  'qCalMETPhi_3',
  'qCalMETPhi_4',
  'qCalMETPhi_5',
  'qCalMETPhi_6',
  'qCalMETPt_0',
  'qCalMETPt_1',
  'qCalMETPt_2',
  'qCalMETPt_3',
  'qCalMETPt_4',
  'qCalMETPt_5',
  'qCalMETPt_6'])

In [9]:
PF_features = [s for s in all_features if s[:3] == 'qPF']
#PF_features = PF_features[:3*7]
len(PF_features), PF_features


Out[9]:
(878,
 ['qPFChMetPhi_0',
  'qPFChMetPhi_1',
  'qPFChMetPhi_2',
  'qPFChMetPhi_3',
  'qPFChMetPhi_4',
  'qPFChMetPhi_5',
  'qPFChMetPhi_6',
  'qPFChMetPt_0',
  'qPFChMetPt_1',
  'qPFChMetPt_2',
  'qPFChMetPt_3',
  'qPFChMetPt_4',
  'qPFChMetPt_5',
  'qPFChMetPt_6',
  'qPFJet0Eta_0',
  'qPFJet0Eta_1',
  'qPFJet0Eta_2',
  'qPFJet0Eta_3',
  'qPFJet0Eta_4',
  'qPFJet0Eta_5',
  'qPFJet0Eta_6',
  'qPFJet0Phi_0',
  'qPFJet0Phi_1',
  'qPFJet0Phi_2',
  'qPFJet0Phi_3',
  'qPFJet0Phi_4',
  'qPFJet0Phi_5',
  'qPFJet0Phi_6',
  'qPFJet0Pt_0',
  'qPFJet0Pt_1',
  'qPFJet0Pt_2',
  'qPFJet0Pt_3',
  'qPFJet0Pt_4',
  'qPFJet0Pt_5',
  'qPFJet0Pt_6',
  'qPFJet1Eta_0',
  'qPFJet1Eta_1',
  'qPFJet1Eta_2',
  'qPFJet1Eta_3',
  'qPFJet1Eta_4',
  'qPFJet1Eta_5',
  'qPFJet1Eta_6',
  'qPFJet1Phi_0',
  'qPFJet1Phi_1',
  'qPFJet1Phi_2',
  'qPFJet1Phi_3',
  'qPFJet1Phi_4',
  'qPFJet1Phi_5',
  'qPFJet1Phi_6',
  'qPFJet1Pt_0',
  'qPFJet1Pt_1',
  'qPFJet1Pt_2',
  'qPFJet1Pt_3',
  'qPFJet1Pt_4',
  'qPFJet1Pt_5',
  'qPFJet1Pt_6',
  'qPFJet2Eta_0',
  'qPFJet2Eta_1',
  'qPFJet2Eta_2',
  'qPFJet2Eta_3',
  'qPFJet2Eta_4',
  'qPFJet2Eta_5',
  'qPFJet2Eta_6',
  'qPFJet2Phi_0',
  'qPFJet2Phi_1',
  'qPFJet2Phi_2',
  'qPFJet2Phi_3',
  'qPFJet2Phi_4',
  'qPFJet2Phi_5',
  'qPFJet2Phi_6',
  'qPFJet2Pt_0',
  'qPFJet2Pt_1',
  'qPFJet2Pt_2',
  'qPFJet2Pt_3',
  'qPFJet2Pt_4',
  'qPFJet2Pt_5',
  'qPFJet2Pt_6',
  'qPFJet3Eta_0',
  'qPFJet3Eta_1',
  'qPFJet3Eta_2',
  'qPFJet3Eta_3',
  'qPFJet3Eta_4',
  'qPFJet3Eta_5',
  'qPFJet3Eta_6',
  'qPFJet3Phi_0',
  'qPFJet3Phi_1',
  'qPFJet3Phi_2',
  'qPFJet3Phi_3',
  'qPFJet3Phi_4',
  'qPFJet3Phi_5',
  'qPFJet3Phi_6',
  'qPFJet3Pt_0',
  'qPFJet3Pt_1',
  'qPFJet3Pt_2',
  'qPFJet3Pt_3',
  'qPFJet3Pt_4',
  'qPFJet3Pt_5',
  'qPFJet3Pt_6',
  'qPFJet4CHS0Eta_0',
  'qPFJet4CHS0Eta_1',
  'qPFJet4CHS0Eta_2',
  'qPFJet4CHS0Eta_3',
  'qPFJet4CHS0Eta_4',
  'qPFJet4CHS0Eta_5',
  'qPFJet4CHS0Eta_6',
  'qPFJet4CHS0Phi_0',
  'qPFJet4CHS0Phi_1',
  'qPFJet4CHS0Phi_2',
  'qPFJet4CHS0Phi_3',
  'qPFJet4CHS0Phi_4',
  'qPFJet4CHS0Phi_5',
  'qPFJet4CHS0Phi_6',
  'qPFJet4CHS0Pt_0',
  'qPFJet4CHS0Pt_1',
  'qPFJet4CHS0Pt_2',
  'qPFJet4CHS0Pt_3',
  'qPFJet4CHS0Pt_4',
  'qPFJet4CHS0Pt_5',
  'qPFJet4CHS0Pt_6',
  'qPFJet4CHS1Eta_0',
  'qPFJet4CHS1Eta_1',
  'qPFJet4CHS1Eta_2',
  'qPFJet4CHS1Eta_3',
  'qPFJet4CHS1Eta_4',
  'qPFJet4CHS1Eta_5',
  'qPFJet4CHS1Eta_6',
  'qPFJet4CHS1Phi_0',
  'qPFJet4CHS1Phi_1',
  'qPFJet4CHS1Phi_2',
  'qPFJet4CHS1Phi_3',
  'qPFJet4CHS1Phi_4',
  'qPFJet4CHS1Phi_5',
  'qPFJet4CHS1Phi_6',
  'qPFJet4CHS1Pt_0',
  'qPFJet4CHS1Pt_1',
  'qPFJet4CHS1Pt_2',
  'qPFJet4CHS1Pt_3',
  'qPFJet4CHS1Pt_4',
  'qPFJet4CHS1Pt_5',
  'qPFJet4CHS1Pt_6',
  'qPFJet4CHS2Eta_0',
  'qPFJet4CHS2Eta_1',
  'qPFJet4CHS2Eta_2',
  'qPFJet4CHS2Eta_3',
  'qPFJet4CHS2Eta_4',
  'qPFJet4CHS2Eta_5',
  'qPFJet4CHS2Eta_6',
  'qPFJet4CHS2Phi_0',
  'qPFJet4CHS2Phi_1',
  'qPFJet4CHS2Phi_2',
  'qPFJet4CHS2Phi_3',
  'qPFJet4CHS2Phi_4',
  'qPFJet4CHS2Phi_5',
  'qPFJet4CHS2Phi_6',
  'qPFJet4CHS2Pt_0',
  'qPFJet4CHS2Pt_1',
  'qPFJet4CHS2Pt_2',
  'qPFJet4CHS2Pt_3',
  'qPFJet4CHS2Pt_4',
  'qPFJet4CHS2Pt_5',
  'qPFJet4CHS2Pt_6',
  'qPFJet4CHS3Eta_0',
  'qPFJet4CHS3Eta_1',
  'qPFJet4CHS3Eta_2',
  'qPFJet4CHS3Eta_3',
  'qPFJet4CHS3Eta_4',
  'qPFJet4CHS3Eta_5',
  'qPFJet4CHS3Eta_6',
  'qPFJet4CHS3Phi_0',
  'qPFJet4CHS3Phi_1',
  'qPFJet4CHS3Phi_2',
  'qPFJet4CHS3Phi_3',
  'qPFJet4CHS3Phi_4',
  'qPFJet4CHS3Phi_5',
  'qPFJet4CHS3Phi_6',
  'qPFJet4CHS3Pt_0',
  'qPFJet4CHS3Pt_1',
  'qPFJet4CHS3Pt_2',
  'qPFJet4CHS3Pt_3',
  'qPFJet4CHS3Pt_4',
  'qPFJet4CHS3Pt_5',
  'qPFJet4CHS3Pt_6',
  'qPFJet4CHS4Eta_0',
  'qPFJet4CHS4Eta_1',
  'qPFJet4CHS4Eta_2',
  'qPFJet4CHS4Eta_3',
  'qPFJet4CHS4Eta_4',
  'qPFJet4CHS4Eta_5',
  'qPFJet4CHS4Eta_6',
  'qPFJet4CHS4Phi_0',
  'qPFJet4CHS4Phi_1',
  'qPFJet4CHS4Phi_2',
  'qPFJet4CHS4Phi_3',
  'qPFJet4CHS4Phi_4',
  'qPFJet4CHS4Phi_5',
  'qPFJet4CHS4Phi_6',
  'qPFJet4CHS4Pt_0',
  'qPFJet4CHS4Pt_1',
  'qPFJet4CHS4Pt_2',
  'qPFJet4CHS4Pt_3',
  'qPFJet4CHS4Pt_4',
  'qPFJet4CHS4Pt_5',
  'qPFJet4CHS4Pt_6',
  'qPFJet4CHS5Eta_0',
  'qPFJet4CHS5Eta_1',
  'qPFJet4CHS5Eta_2',
  'qPFJet4CHS5Eta_3',
  'qPFJet4CHS5Eta_4',
  'qPFJet4CHS5Eta_5',
  'qPFJet4CHS5Eta_6',
  'qPFJet4CHS5Phi_0',
  'qPFJet4CHS5Phi_1',
  'qPFJet4CHS5Phi_2',
  'qPFJet4CHS5Phi_3',
  'qPFJet4CHS5Phi_4',
  'qPFJet4CHS5Phi_5',
  'qPFJet4CHS5Phi_6',
  'qPFJet4CHS5Pt_0',
  'qPFJet4CHS5Pt_1',
  'qPFJet4CHS5Pt_2',
  'qPFJet4CHS5Pt_3',
  'qPFJet4CHS5Pt_4',
  'qPFJet4CHS5Pt_5',
  'qPFJet4CHS5Pt_6',
  'qPFJet4CHSEta_0',
  'qPFJet4CHSEta_1',
  'qPFJet4CHSEta_2',
  'qPFJet4CHSEta_3',
  'qPFJet4CHSEta_4',
  'qPFJet4CHSEta_5',
  'qPFJet4CHSEta_6',
  'qPFJet4CHSPhi_0',
  'qPFJet4CHSPhi_1',
  'qPFJet4CHSPhi_2',
  'qPFJet4CHSPhi_3',
  'qPFJet4CHSPhi_4',
  'qPFJet4CHSPhi_5',
  'qPFJet4CHSPhi_6',
  'qPFJet4CHSPt_0',
  'qPFJet4CHSPt_1',
  'qPFJet4CHSPt_2',
  'qPFJet4CHSPt_3',
  'qPFJet4CHSPt_4',
  'qPFJet4CHSPt_5',
  'qPFJet4CHSPt_6',
  'qPFJet4Eta_0',
  'qPFJet4Eta_1',
  'qPFJet4Eta_2',
  'qPFJet4Eta_3',
  'qPFJet4Eta_4',
  'qPFJet4Eta_5',
  'qPFJet4Eta_6',
  'qPFJet4Phi_0',
  'qPFJet4Phi_1',
  'qPFJet4Phi_2',
  'qPFJet4Phi_3',
  'qPFJet4Phi_4',
  'qPFJet4Phi_5',
  'qPFJet4Phi_6',
  'qPFJet4Pt_0',
  'qPFJet4Pt_1',
  'qPFJet4Pt_2',
  'qPFJet4Pt_3',
  'qPFJet4Pt_4',
  'qPFJet4Pt_5',
  'qPFJet4Pt_6',
  'qPFJet5Eta_0',
  'qPFJet5Eta_1',
  'qPFJet5Eta_2',
  'qPFJet5Eta_3',
  'qPFJet5Eta_4',
  'qPFJet5Eta_5',
  'qPFJet5Eta_6',
  'qPFJet5Phi_0',
  'qPFJet5Phi_1',
  'qPFJet5Phi_2',
  'qPFJet5Phi_3',
  'qPFJet5Phi_4',
  'qPFJet5Phi_5',
  'qPFJet5Phi_6',
  'qPFJet5Pt_0',
  'qPFJet5Pt_1',
  'qPFJet5Pt_2',
  'qPFJet5Pt_3',
  'qPFJet5Pt_4',
  'qPFJet5Pt_5',
  'qPFJet5Pt_6',
  'qPFJet8CHS0Eta_0',
  'qPFJet8CHS0Eta_1',
  'qPFJet8CHS0Eta_2',
  'qPFJet8CHS0Eta_3',
  'qPFJet8CHS0Eta_4',
  'qPFJet8CHS0Eta_5',
  'qPFJet8CHS0Eta_6',
  'qPFJet8CHS0Phi_0',
  'qPFJet8CHS0Phi_1',
  'qPFJet8CHS0Phi_2',
  'qPFJet8CHS0Phi_3',
  'qPFJet8CHS0Phi_4',
  'qPFJet8CHS0Phi_5',
  'qPFJet8CHS0Phi_6',
  'qPFJet8CHS0Pt_0',
  'qPFJet8CHS0Pt_1',
  'qPFJet8CHS0Pt_2',
  'qPFJet8CHS0Pt_3',
  'qPFJet8CHS0Pt_4',
  'qPFJet8CHS0Pt_5',
  'qPFJet8CHS0Pt_6',
  'qPFJet8CHS1Eta_0',
  'qPFJet8CHS1Eta_1',
  'qPFJet8CHS1Eta_2',
  'qPFJet8CHS1Eta_3',
  'qPFJet8CHS1Eta_4',
  'qPFJet8CHS1Eta_5',
  'qPFJet8CHS1Eta_6',
  'qPFJet8CHS1Phi_0',
  'qPFJet8CHS1Phi_1',
  'qPFJet8CHS1Phi_2',
  'qPFJet8CHS1Phi_3',
  'qPFJet8CHS1Phi_4',
  'qPFJet8CHS1Phi_5',
  'qPFJet8CHS1Phi_6',
  'qPFJet8CHS1Pt_0',
  'qPFJet8CHS1Pt_1',
  'qPFJet8CHS1Pt_2',
  'qPFJet8CHS1Pt_3',
  'qPFJet8CHS1Pt_4',
  'qPFJet8CHS1Pt_5',
  'qPFJet8CHS1Pt_6',
  'qPFJet8CHS2Eta_0',
  'qPFJet8CHS2Eta_1',
  'qPFJet8CHS2Eta_2',
  'qPFJet8CHS2Eta_3',
  'qPFJet8CHS2Eta_4',
  'qPFJet8CHS2Eta_5',
  'qPFJet8CHS2Eta_6',
  'qPFJet8CHS2Phi_0',
  'qPFJet8CHS2Phi_1',
  'qPFJet8CHS2Phi_2',
  'qPFJet8CHS2Phi_3',
  'qPFJet8CHS2Phi_4',
  'qPFJet8CHS2Phi_5',
  'qPFJet8CHS2Phi_6',
  'qPFJet8CHS2Pt_0',
  'qPFJet8CHS2Pt_1',
  'qPFJet8CHS2Pt_3',
  'qPFJet8CHS2Pt_4',
  'qPFJet8CHS2Pt_5',
  'qPFJet8CHS2Pt_6',
  'qPFJet8CHS3Eta_0',
  'qPFJet8CHS3Eta_1',
  'qPFJet8CHS3Eta_2',
  'qPFJet8CHS3Eta_3',
  'qPFJet8CHS3Eta_4',
  'qPFJet8CHS3Eta_5',
  'qPFJet8CHS3Eta_6',
  'qPFJet8CHS3Phi_0',
  'qPFJet8CHS3Phi_1',
  'qPFJet8CHS3Phi_2',
  'qPFJet8CHS3Phi_3',
  'qPFJet8CHS3Phi_4',
  'qPFJet8CHS3Phi_5',
  'qPFJet8CHS3Phi_6',
  'qPFJet8CHS3Pt_0',
  'qPFJet8CHS3Pt_1',
  'qPFJet8CHS3Pt_3',
  'qPFJet8CHS3Pt_4',
  'qPFJet8CHS3Pt_5',
  'qPFJet8CHS3Pt_6',
  'qPFJet8CHS4Eta_0',
  'qPFJet8CHS4Eta_1',
  'qPFJet8CHS4Eta_2',
  'qPFJet8CHS4Eta_3',
  'qPFJet8CHS4Eta_4',
  'qPFJet8CHS4Eta_5',
  'qPFJet8CHS4Eta_6',
  'qPFJet8CHS4Phi_0',
  'qPFJet8CHS4Phi_1',
  'qPFJet8CHS4Phi_2',
  'qPFJet8CHS4Phi_3',
  'qPFJet8CHS4Phi_4',
  'qPFJet8CHS4Phi_5',
  'qPFJet8CHS4Phi_6',
  'qPFJet8CHS4Pt_0',
  'qPFJet8CHS4Pt_1',
  'qPFJet8CHS4Pt_3',
  'qPFJet8CHS4Pt_4',
  'qPFJet8CHS4Pt_5',
  'qPFJet8CHS4Pt_6',
  'qPFJet8CHS5Eta_0',
  'qPFJet8CHS5Eta_1',
  'qPFJet8CHS5Eta_2',
  'qPFJet8CHS5Eta_3',
  'qPFJet8CHS5Eta_5',
  'qPFJet8CHS5Eta_6',
  'qPFJet8CHS5Phi_0',
  'qPFJet8CHS5Phi_1',
  'qPFJet8CHS5Phi_2',
  'qPFJet8CHS5Phi_3',
  'qPFJet8CHS5Phi_5',
  'qPFJet8CHS5Phi_6',
  'qPFJet8CHS5Pt_0',
  'qPFJet8CHS5Pt_1',
  'qPFJet8CHS5Pt_3',
  'qPFJet8CHS5Pt_4',
  'qPFJet8CHS5Pt_5',
  'qPFJet8CHS5Pt_6',
  'qPFJet8CHSEta_0',
  'qPFJet8CHSEta_1',
  'qPFJet8CHSEta_2',
  'qPFJet8CHSEta_3',
  'qPFJet8CHSEta_4',
  'qPFJet8CHSEta_5',
  'qPFJet8CHSEta_6',
  'qPFJet8CHSPhi_0',
  'qPFJet8CHSPhi_1',
  'qPFJet8CHSPhi_2',
  'qPFJet8CHSPhi_3',
  'qPFJet8CHSPhi_4',
  'qPFJet8CHSPhi_5',
  'qPFJet8CHSPhi_6',
  'qPFJet8CHSPt_0',
  'qPFJet8CHSPt_1',
  'qPFJet8CHSPt_2',
  'qPFJet8CHSPt_3',
  'qPFJet8CHSPt_4',
  'qPFJet8CHSPt_5',
  'qPFJet8CHSPt_6',
  'qPFJet8CHSSD0Eta_0',
  'qPFJet8CHSSD0Eta_1',
  'qPFJet8CHSSD0Eta_2',
  'qPFJet8CHSSD0Eta_3',
  'qPFJet8CHSSD0Eta_4',
  'qPFJet8CHSSD0Eta_5',
  'qPFJet8CHSSD0Eta_6',
  'qPFJet8CHSSD0Phi_0',
  'qPFJet8CHSSD0Phi_1',
  'qPFJet8CHSSD0Phi_2',
  'qPFJet8CHSSD0Phi_3',
  'qPFJet8CHSSD0Phi_4',
  'qPFJet8CHSSD0Phi_5',
  'qPFJet8CHSSD0Phi_6',
  'qPFJet8CHSSD0Pt_0',
  'qPFJet8CHSSD0Pt_1',
  'qPFJet8CHSSD0Pt_3',
  'qPFJet8CHSSD0Pt_4',
  'qPFJet8CHSSD0Pt_5',
  'qPFJet8CHSSD0Pt_6',
  'qPFJet8CHSSD1Eta_0',
  'qPFJet8CHSSD1Eta_1',
  'qPFJet8CHSSD1Eta_2',
  'qPFJet8CHSSD1Eta_3',
  'qPFJet8CHSSD1Eta_4',
  'qPFJet8CHSSD1Eta_5',
  'qPFJet8CHSSD1Eta_6',
  'qPFJet8CHSSD1Phi_0',
  'qPFJet8CHSSD1Phi_1',
  'qPFJet8CHSSD1Phi_2',
  'qPFJet8CHSSD1Phi_3',
  'qPFJet8CHSSD1Phi_4',
  'qPFJet8CHSSD1Phi_5',
  'qPFJet8CHSSD1Phi_6',
  'qPFJet8CHSSD1Pt_0',
  'qPFJet8CHSSD1Pt_1',
  'qPFJet8CHSSD1Pt_3',
  'qPFJet8CHSSD1Pt_4',
  'qPFJet8CHSSD1Pt_5',
  'qPFJet8CHSSD1Pt_6',
  'qPFJet8CHSSD2Eta_0',
  'qPFJet8CHSSD2Eta_1',
  'qPFJet8CHSSD2Eta_2',
  'qPFJet8CHSSD2Eta_3',
  'qPFJet8CHSSD2Eta_4',
  'qPFJet8CHSSD2Eta_5',
  'qPFJet8CHSSD2Eta_6',
  'qPFJet8CHSSD2Phi_0',
  'qPFJet8CHSSD2Phi_1',
  'qPFJet8CHSSD2Phi_2',
  'qPFJet8CHSSD2Phi_3',
  'qPFJet8CHSSD2Phi_4',
  'qPFJet8CHSSD2Phi_5',
  'qPFJet8CHSSD2Phi_6',
  'qPFJet8CHSSD2Pt_0',
  'qPFJet8CHSSD2Pt_1',
  'qPFJet8CHSSD2Pt_3',
  'qPFJet8CHSSD2Pt_4',
  'qPFJet8CHSSD2Pt_5',
  'qPFJet8CHSSD2Pt_6',
  'qPFJet8CHSSD3Eta_0',
  'qPFJet8CHSSD3Eta_1',
  'qPFJet8CHSSD3Eta_2',
  'qPFJet8CHSSD3Eta_3',
  'qPFJet8CHSSD3Eta_4',
  'qPFJet8CHSSD3Eta_5',
  'qPFJet8CHSSD3Eta_6',
  'qPFJet8CHSSD3Phi_0',
  'qPFJet8CHSSD3Phi_1',
  'qPFJet8CHSSD3Phi_2',
  'qPFJet8CHSSD3Phi_3',
  'qPFJet8CHSSD3Phi_4',
  'qPFJet8CHSSD3Phi_5',
  'qPFJet8CHSSD3Phi_6',
  'qPFJet8CHSSD3Pt_0',
  'qPFJet8CHSSD3Pt_1',
  'qPFJet8CHSSD3Pt_3',
  'qPFJet8CHSSD3Pt_4',
  'qPFJet8CHSSD3Pt_5',
  'qPFJet8CHSSD3Pt_6',
  'qPFJet8CHSSD4Eta_0',
  'qPFJet8CHSSD4Eta_1',
  'qPFJet8CHSSD4Eta_2',
  'qPFJet8CHSSD4Eta_3',
  'qPFJet8CHSSD4Eta_5',
  'qPFJet8CHSSD4Eta_6',
  'qPFJet8CHSSD4Phi_0',
  'qPFJet8CHSSD4Phi_1',
  'qPFJet8CHSSD4Phi_2',
  'qPFJet8CHSSD4Phi_3',
  'qPFJet8CHSSD4Phi_5',
  'qPFJet8CHSSD4Phi_6',
  'qPFJet8CHSSD4Pt_0',
  'qPFJet8CHSSD4Pt_1',
  'qPFJet8CHSSD4Pt_4',
  'qPFJet8CHSSD4Pt_5',
  'qPFJet8CHSSD4Pt_6',
  'qPFJet8CHSSD5Eta_0',
  'qPFJet8CHSSD5Eta_1',
  'qPFJet8CHSSD5Eta_2',
  'qPFJet8CHSSD5Eta_3',
  'qPFJet8CHSSD5Eta_5',
  'qPFJet8CHSSD5Eta_6',
  'qPFJet8CHSSD5Phi_0',
  'qPFJet8CHSSD5Phi_1',
  'qPFJet8CHSSD5Phi_2',
  'qPFJet8CHSSD5Phi_3',
  'qPFJet8CHSSD5Phi_5',
  'qPFJet8CHSSD5Phi_6',
  'qPFJet8CHSSD5Pt_0',
  'qPFJet8CHSSD5Pt_1',
  'qPFJet8CHSSD5Pt_4',
  'qPFJet8CHSSD5Pt_5',
  'qPFJet8CHSSD5Pt_6',
  'qPFJet8CHSSDEta_0',
  'qPFJet8CHSSDEta_1',
  'qPFJet8CHSSDEta_2',
  'qPFJet8CHSSDEta_3',
  'qPFJet8CHSSDEta_4',
  'qPFJet8CHSSDEta_5',
  'qPFJet8CHSSDEta_6',
  'qPFJet8CHSSDPhi_0',
  'qPFJet8CHSSDPhi_1',
  'qPFJet8CHSSDPhi_2',
  'qPFJet8CHSSDPhi_3',
  'qPFJet8CHSSDPhi_4',
  'qPFJet8CHSSDPhi_5',
  'qPFJet8CHSSDPhi_6',
  'qPFJet8CHSSDPt_0',
  'qPFJet8CHSSDPt_1',
  'qPFJet8CHSSDPt_2',
  'qPFJet8CHSSDPt_3',
  'qPFJet8CHSSDPt_4',
  'qPFJet8CHSSDPt_5',
  'qPFJet8CHSSDPt_6',
  'qPFJetEI0Eta_0',
  'qPFJetEI0Eta_1',
  'qPFJetEI0Eta_2',
  'qPFJetEI0Eta_3',
  'qPFJetEI0Eta_4',
  'qPFJetEI0Eta_5',
  'qPFJetEI0Eta_6',
  'qPFJetEI0Phi_0',
  'qPFJetEI0Phi_1',
  'qPFJetEI0Phi_2',
  'qPFJetEI0Phi_3',
  'qPFJetEI0Phi_4',
  'qPFJetEI0Phi_5',
  'qPFJetEI0Phi_6',
  'qPFJetEI0Pt_0',
  'qPFJetEI0Pt_1',
  'qPFJetEI0Pt_2',
  'qPFJetEI0Pt_3',
  'qPFJetEI0Pt_4',
  'qPFJetEI0Pt_5',
  'qPFJetEI0Pt_6',
  'qPFJetEI1Eta_0',
  'qPFJetEI1Eta_1',
  'qPFJetEI1Eta_2',
  'qPFJetEI1Eta_3',
  'qPFJetEI1Eta_4',
  'qPFJetEI1Eta_5',
  'qPFJetEI1Eta_6',
  'qPFJetEI1Phi_0',
  'qPFJetEI1Phi_1',
  'qPFJetEI1Phi_2',
  'qPFJetEI1Phi_3',
  'qPFJetEI1Phi_4',
  'qPFJetEI1Phi_5',
  'qPFJetEI1Phi_6',
  'qPFJetEI1Pt_0',
  'qPFJetEI1Pt_1',
  'qPFJetEI1Pt_2',
  'qPFJetEI1Pt_3',
  'qPFJetEI1Pt_4',
  'qPFJetEI1Pt_5',
  'qPFJetEI1Pt_6',
  'qPFJetEI2Eta_0',
  'qPFJetEI2Eta_1',
  'qPFJetEI2Eta_2',
  'qPFJetEI2Eta_3',
  'qPFJetEI2Eta_4',
  'qPFJetEI2Eta_5',
  'qPFJetEI2Eta_6',
  'qPFJetEI2Phi_0',
  'qPFJetEI2Phi_1',
  'qPFJetEI2Phi_2',
  'qPFJetEI2Phi_3',
  'qPFJetEI2Phi_4',
  'qPFJetEI2Phi_5',
  'qPFJetEI2Phi_6',
  'qPFJetEI2Pt_0',
  'qPFJetEI2Pt_1',
  'qPFJetEI2Pt_2',
  'qPFJetEI2Pt_3',
  'qPFJetEI2Pt_4',
  'qPFJetEI2Pt_5',
  'qPFJetEI2Pt_6',
  'qPFJetEI3Eta_0',
  'qPFJetEI3Eta_1',
  'qPFJetEI3Eta_2',
  'qPFJetEI3Eta_3',
  'qPFJetEI3Eta_4',
  'qPFJetEI3Eta_5',
  'qPFJetEI3Eta_6',
  'qPFJetEI3Phi_0',
  'qPFJetEI3Phi_1',
  'qPFJetEI3Phi_2',
  'qPFJetEI3Phi_3',
  'qPFJetEI3Phi_4',
  'qPFJetEI3Phi_5',
  'qPFJetEI3Phi_6',
  'qPFJetEI3Pt_0',
  'qPFJetEI3Pt_1',
  'qPFJetEI3Pt_2',
  'qPFJetEI3Pt_3',
  'qPFJetEI3Pt_4',
  'qPFJetEI3Pt_5',
  'qPFJetEI3Pt_6',
  'qPFJetEI4Eta_0',
  'qPFJetEI4Eta_1',
  'qPFJetEI4Eta_2',
  'qPFJetEI4Eta_3',
  'qPFJetEI4Eta_4',
  'qPFJetEI4Eta_5',
  'qPFJetEI4Eta_6',
  'qPFJetEI4Phi_0',
  'qPFJetEI4Phi_1',
  'qPFJetEI4Phi_2',
  'qPFJetEI4Phi_3',
  'qPFJetEI4Phi_4',
  'qPFJetEI4Phi_5',
  'qPFJetEI4Phi_6',
  'qPFJetEI4Pt_0',
  'qPFJetEI4Pt_1',
  'qPFJetEI4Pt_2',
  'qPFJetEI4Pt_3',
  'qPFJetEI4Pt_4',
  'qPFJetEI4Pt_5',
  'qPFJetEI4Pt_6',
  'qPFJetEI5Eta_0',
  'qPFJetEI5Eta_1',
  'qPFJetEI5Eta_2',
  'qPFJetEI5Eta_3',
  'qPFJetEI5Eta_4',
  'qPFJetEI5Eta_5',
  'qPFJetEI5Eta_6',
  'qPFJetEI5Phi_0',
  'qPFJetEI5Phi_1',
  'qPFJetEI5Phi_2',
  'qPFJetEI5Phi_3',
  'qPFJetEI5Phi_4',
  'qPFJetEI5Phi_5',
  'qPFJetEI5Phi_6',
  'qPFJetEI5Pt_0',
  'qPFJetEI5Pt_1',
  'qPFJetEI5Pt_2',
  'qPFJetEI5Pt_3',
  'qPFJetEI5Pt_4',
  'qPFJetEI5Pt_5',
  'qPFJetEI5Pt_6',
  'qPFJetEIEta_0',
  'qPFJetEIEta_1',
  'qPFJetEIEta_2',
  'qPFJetEIEta_3',
  'qPFJetEIEta_4',
  'qPFJetEIEta_5',
  'qPFJetEIEta_6',
  'qPFJetEIPhi_0',
  'qPFJetEIPhi_1',
  'qPFJetEIPhi_2',
  'qPFJetEIPhi_3',
  'qPFJetEIPhi_4',
  'qPFJetEIPhi_5',
  'qPFJetEIPhi_6',
  'qPFJetEIPt_0',
  'qPFJetEIPt_1',
  'qPFJetEIPt_2',
  'qPFJetEIPt_3',
  'qPFJetEIPt_4',
  'qPFJetEIPt_5',
  'qPFJetEIPt_6',
  'qPFJetEta_0',
  'qPFJetEta_1',
  'qPFJetEta_2',
  'qPFJetEta_3',
  'qPFJetEta_4',
  'qPFJetEta_5',
  'qPFJetEta_6',
  'qPFJetPhi_0',
  'qPFJetPhi_1',
  'qPFJetPhi_2',
  'qPFJetPhi_3',
  'qPFJetPhi_4',
  'qPFJetPhi_5',
  'qPFJetPhi_6',
  'qPFJetPt_0',
  'qPFJetPt_1',
  'qPFJetPt_2',
  'qPFJetPt_3',
  'qPFJetPt_4',
  'qPFJetPt_5',
  'qPFJetPt_6',
  'qPFJetTopCHS0Eta_0',
  'qPFJetTopCHS0Eta_1',
  'qPFJetTopCHS0Eta_2',
  'qPFJetTopCHS0Eta_3',
  'qPFJetTopCHS0Eta_4',
  'qPFJetTopCHS0Eta_5',
  'qPFJetTopCHS0Eta_6',
  'qPFJetTopCHS0Phi_0',
  'qPFJetTopCHS0Phi_1',
  'qPFJetTopCHS0Phi_2',
  'qPFJetTopCHS0Phi_3',
  'qPFJetTopCHS0Phi_4',
  'qPFJetTopCHS0Phi_5',
  'qPFJetTopCHS0Phi_6',
  'qPFJetTopCHS0Pt_0',
  'qPFJetTopCHS0Pt_1',
  'qPFJetTopCHS0Pt_3',
  'qPFJetTopCHS0Pt_4',
  'qPFJetTopCHS0Pt_5',
  'qPFJetTopCHS0Pt_6',
  'qPFJetTopCHS1Eta_0',
  'qPFJetTopCHS1Eta_1',
  'qPFJetTopCHS1Eta_2',
  'qPFJetTopCHS1Eta_3',
  'qPFJetTopCHS1Eta_4',
  'qPFJetTopCHS1Eta_5',
  'qPFJetTopCHS1Eta_6',
  'qPFJetTopCHS1Phi_0',
  'qPFJetTopCHS1Phi_1',
  'qPFJetTopCHS1Phi_2',
  'qPFJetTopCHS1Phi_3',
  'qPFJetTopCHS1Phi_4',
  'qPFJetTopCHS1Phi_5',
  'qPFJetTopCHS1Phi_6',
  'qPFJetTopCHS1Pt_0',
  'qPFJetTopCHS1Pt_1',
  'qPFJetTopCHS1Pt_3',
  'qPFJetTopCHS1Pt_4',
  'qPFJetTopCHS1Pt_5',
  'qPFJetTopCHS1Pt_6',
  'qPFJetTopCHS2Eta_0',
  'qPFJetTopCHS2Eta_1',
  'qPFJetTopCHS2Eta_2',
  'qPFJetTopCHS2Eta_3',
  'qPFJetTopCHS2Eta_4',
  'qPFJetTopCHS2Eta_5',
  'qPFJetTopCHS2Eta_6',
  'qPFJetTopCHS2Phi_0',
  'qPFJetTopCHS2Phi_1',
  'qPFJetTopCHS2Phi_2',
  'qPFJetTopCHS2Phi_3',
  'qPFJetTopCHS2Phi_4',
  'qPFJetTopCHS2Phi_5',
  'qPFJetTopCHS2Phi_6',
  'qPFJetTopCHS2Pt_0',
  'qPFJetTopCHS2Pt_1',
  'qPFJetTopCHS2Pt_3',
  'qPFJetTopCHS2Pt_4',
  'qPFJetTopCHS2Pt_5',
  'qPFJetTopCHS2Pt_6',
  'qPFJetTopCHS3Eta_0',
  'qPFJetTopCHS3Eta_1',
  'qPFJetTopCHS3Eta_2',
  'qPFJetTopCHS3Eta_3',
  'qPFJetTopCHS3Eta_5',
  'qPFJetTopCHS3Eta_6',
  'qPFJetTopCHS3Phi_0',
  'qPFJetTopCHS3Phi_1',
  'qPFJetTopCHS3Phi_2',
  'qPFJetTopCHS3Phi_3',
  'qPFJetTopCHS3Phi_5',
  'qPFJetTopCHS3Phi_6',
  'qPFJetTopCHS3Pt_0',
  'qPFJetTopCHS3Pt_1',
  'qPFJetTopCHS3Pt_3',
  'qPFJetTopCHS3Pt_4',
  'qPFJetTopCHS3Pt_5',
  'qPFJetTopCHS3Pt_6',
  'qPFJetTopCHS4Eta_0',
  'qPFJetTopCHS4Eta_1',
  'qPFJetTopCHS4Eta_2',
  'qPFJetTopCHS4Eta_3',
  'qPFJetTopCHS4Eta_5',
  'qPFJetTopCHS4Eta_6',
  'qPFJetTopCHS4Phi_0',
  'qPFJetTopCHS4Phi_1',
  'qPFJetTopCHS4Phi_2',
  'qPFJetTopCHS4Phi_3',
  'qPFJetTopCHS4Phi_5',
  'qPFJetTopCHS4Phi_6',
  'qPFJetTopCHS4Pt_0',
  'qPFJetTopCHS4Pt_1',
  'qPFJetTopCHS4Pt_4',
  'qPFJetTopCHS4Pt_5',
  'qPFJetTopCHS4Pt_6',
  'qPFJetTopCHS5Eta_0',
  'qPFJetTopCHS5Eta_1',
  'qPFJetTopCHS5Eta_2',
  'qPFJetTopCHS5Eta_3',
  'qPFJetTopCHS5Eta_5',
  'qPFJetTopCHS5Eta_6',
  'qPFJetTopCHS5Phi_0',
  'qPFJetTopCHS5Phi_1',
  'qPFJetTopCHS5Phi_2',
  'qPFJetTopCHS5Phi_3',
  'qPFJetTopCHS5Phi_5',
  'qPFJetTopCHS5Phi_6',
  'qPFJetTopCHS5Pt_0',
  'qPFJetTopCHS5Pt_1',
  'qPFJetTopCHS5Pt_4',
  'qPFJetTopCHS5Pt_5',
  'qPFJetTopCHS5Pt_6',
  'qPFJetTopCHSEta_0',
  'qPFJetTopCHSEta_1',
  'qPFJetTopCHSEta_2',
  'qPFJetTopCHSEta_3',
  'qPFJetTopCHSEta_4',
  'qPFJetTopCHSEta_5',
  'qPFJetTopCHSEta_6',
  'qPFJetTopCHSPhi_0',
  'qPFJetTopCHSPhi_1',
  'qPFJetTopCHSPhi_2',
  'qPFJetTopCHSPhi_3',
  'qPFJetTopCHSPhi_4',
  'qPFJetTopCHSPhi_5',
  'qPFJetTopCHSPhi_6',
  'qPFJetTopCHSPt_0',
  'qPFJetTopCHSPt_1',
  'qPFJetTopCHSPt_2',
  'qPFJetTopCHSPt_3',
  'qPFJetTopCHSPt_4',
  'qPFJetTopCHSPt_5',
  'qPFJetTopCHSPt_6',
  'qPFMetPhi_0',
  'qPFMetPhi_1',
  'qPFMetPhi_2',
  'qPFMetPhi_3',
  'qPFMetPhi_4',
  'qPFMetPhi_5',
  'qPFMetPhi_6',
  'qPFMetPt_0',
  'qPFMetPt_1',
  'qPFMetPt_2',
  'qPFMetPt_3',
  'qPFMetPt_4',
  'qPFMetPt_5',
  'qPFMetPt_6'])

In [10]:
#20% for test set
indx_train = np.arange(data_features.shape[0]-int(data_features.shape[0]/5), dtype='int32')
indx_test = np.arange(data_features.shape[0]-int(data_features.shape[0]/5),data_features.shape[0], dtype='int32')

# from sklearn.model_selection import train_test_split
# indx_train, indx_test = train_test_split(np.arange(len(labels), dtype='int32'), stratify=labels, test_size=0.2, random_state = 1)

from sklearn.model_selection import train_test_split
indx_train, indx_val = train_test_split(indx_train, stratify=np.array(labels.iloc[indx_train], 'float32'), test_size=0.1, random_state = 1)

In [11]:
%env THEANO_FLAGS=device=cuda,gpuarray.preallocate=0.8,floatX=float32
#%env MKL_THREADING_LAYER=GNU
import numpy as np
import theano, theano.tensor as T
from lasagne.layers import *


env: THEANO_FLAGS=device=cuda,gpuarray.preallocate=0.8,floatX=float32
Can not use cuDNN on context None: cannot compile with cuDNN. We got this error:
b'/usr/bin/ld: cannot find -lcudnn\ncollect2: error: ld returned 1 exit status\n'
Preallocating 9151/11439 Mb (0.800000) on cuda
Mapped name None to device cuda: Tesla K80 (E5A6:00:00.0)

In [12]:
from sklearn.metrics import roc_curve, auc, roc_auc_score

from collections import defaultdict
from IPython import display
import time

In [13]:
num_bad = np.sum(labels)
num_good = len(labels)-np.sum(labels)

weights = 0.5 / np.where(labels == 0.0, num_good, num_bad)
weights *= len(labels)

In [14]:
# lumi_bad = np.sum(lumi[labels == 0.0])
# lumi_good = np.sum(lumi[labels == 1.0])

In [15]:
# ### By normalizing weights we implicitly define equal probabilities for each class
# weights = lumi / np.where(labels == 1.0, lumi_good, lumi_bad)
# weights *= lumi.shape[0]

In [16]:
# w_bad = np.sum(weights[labels == 0.0])
# w_good = np.sum(weights[labels == 1.0])

In [17]:
channels_features = dict()

channels_features['muons'] = Muon_features
channels_features['photons'] = Pho_features
channels_features['PF'] = PF_features
channels_features['calo'] = Cal_features
# for k in [('muons', 'muons'), ('photons', 'photons'), ('minibias', 'PF'), ('minibias', 'calo')]:
#     channels_features[k[1]] = grouped[k]

In [18]:
[ (g, len(fs)) for g, fs in channels_features.items() ]


Out[18]:
[('muons', 439), ('photons', 224), ('PF', 878), ('calo', 280)]

Building netwrok


In [19]:
### For simplicity each feature group is put into its own shared variable.
shareds = {}

for k in channels_features:
    features = channels_features[k]

    shareds[k] = theano.shared(
        data_features[features].get_values().astype('float32'),
        name = 'X %s' % k
    )

In [20]:
labels = np.array(labels, 'float32')

In [21]:
labels_shared = theano.shared(labels, 'labels')
weights_shared = theano.shared(weights.astype('float32'), 'weights')

In [22]:
batch_indx = T.ivector('batch indx')

In [23]:
def batch_stream(X, batch_size=32):
    indx = np.random.permutation(X.shape[0])
    n_batches = X.shape[0] / batch_size

    for i in range(int(n_batches)):
        batch_indx = indx[(i * batch_size):(i * batch_size + batch_size)]
        yield X[batch_indx]

In [24]:
from lasagne import *

In [25]:
def build_network(shared, batch_indx, num_units = (50, 10), n_dropout=2, p_dropout=0.25):
    n_features = shared.get_value().shape[1]
    X_batch = shared[batch_indx]

    input_layer = layers.InputLayer(shape=(None, n_features), input_var=X_batch)
    net = input_layer
    
    net = layers.DropoutLayer(net, p=0.1, rescale=False)
    
    for i, n in enumerate(num_units):
        net = layers.DenseLayer(net, num_units=n, nonlinearity=nonlinearities.sigmoid)
        if i < n_dropout:
            net = layers.DropoutLayer(net, p=p_dropout, rescale=True)
    
    net = layers.DenseLayer(net, num_units=1, nonlinearity=nonlinearities.sigmoid)
    
    det_prediction = T.flatten(layers.get_output(net, deterministic=True))
    train_prediction = T.flatten(layers.get_output(net, deterministic=False))
    
    return net, det_prediction, train_prediction

In [32]:
networks = {}
det_predictions = {}
train_predictions = {}

for k in shareds:
    shared = shareds[k]

    net, det_prediction, train_prediction = build_network(shared, batch_indx, num_units=(100, 50, 10), p_dropout=0.25)
    
    det_predictions[k] = det_prediction
    train_predictions[k] = train_prediction

    networks[k] = net

In [33]:
get_get_predictions = {}
get_stochastic_predictions = {}

for k in det_predictions:
    get_get_predictions[k] = theano.function([batch_indx], det_predictions[k])
    get_stochastic_predictions[k] = theano.function([batch_indx], train_predictions[k])

In [34]:
labels_batch = labels_shared[batch_indx]
weights_batch = weights_shared[batch_indx]

In [35]:
from functools import reduce

In [36]:
reg = reduce(lambda a, b: T.maximum(a, b), [
    regularization.regularize_network_params(networks[k], penalty=regularization.l2)
    for k in networks
])

In [37]:
def fuzzy_and(args):
    s = reduce(lambda a, b: a + b, args)
    return T.exp(s - 4.0)

train_global_prediction = fuzzy_and(train_predictions.values())
det_global_prediction = fuzzy_and(det_predictions.values())

In [38]:
c_reg = T.fscalar('c reg')
learning_rate = T.fscalar('learning rate')

coef_loss = theano.shared(np.array(0.7, dtype=theano.config.floatX))
decay = np.array(0.8, dtype=theano.config.floatX)

In [39]:
log_losses = -((1 - labels_batch) * T.log(1 - train_global_prediction) + labels_batch * T.log(train_global_prediction))

pure_loss = T.mean(weights_batch * log_losses)

loss = pure_loss +  c_reg * reg

pure_losses = {}
for k in networks:
    log_loss = -((1 - labels_batch) * T.log(1 - train_predictions[k]) + labels_batch * T.log(train_predictions[k]))
    pure_losses[k] = T.mean(weights_batch * log_loss)

modified_loss = (1 - coef_loss)*loss + coef_loss*np.sum(pure_losses[k] for k in networks)/4.

Modified loss function to accelerate convergence is used:

$ L' = (1-C) * L + C * (L_1 + L_2 + L_3 + L_4) / 4 $,

where: $L$ - old loss (cross-entropy for “fuzzy AND” output), $L_i$ - 'companion' losses, $C$ - constant to regulate amount of “pretraining” ($C$ ~ 1, $C$ < 1).

Companion losses can be cross-entropy of corresponding subnetwork scores against global labels. So it is simillar to the pretraining of separate networks on global labels.

Every $k$ epochs constant C is decreased, and ensemble performance becomes determinant.


In [40]:
params = reduce(lambda a, b: a + b, [
        layers.get_all_params(net)
        for net in networks.values()
])

upd = updates.adam(modified_loss, params, learning_rate = learning_rate)

In [41]:
train = theano.function([batch_indx, c_reg, learning_rate], [modified_loss, pure_loss], updates=upd)
get_loss = theano.function([batch_indx], pure_loss)

get_prediction = theano.function([batch_indx], det_global_prediction)
get_train_prediction = theano.function([batch_indx], train_global_prediction)

In [42]:
#indx_train, indx_test = train_test_split(np.arange(data.shape[0], dtype='int32'), stratify=labels, test_size=0.1, random_state = 1)

In [43]:
n_epoches = 150
batch_size = 256
n_batches = indx_train.shape[0] / batch_size
lr = 1e-3
c_reg = 1.0e-7

pure_losses = np.zeros(shape=(2, n_epoches), dtype='float32')
validation_losses = np.zeros(shape=(len(networks)+1, n_epoches), dtype='float32')

In [44]:
plt.rcParams['axes.facecolor'] = 'white'
plt.rc('grid', linestyle="-", color='gray')
plt.rc('axes', edgecolor='gray')

In [45]:
for epoch in range(0, n_epoches):
    if epoch%130 == 129:   
        #save the network's weights
        netInfo  = {}
        for net in networks:
            netInfo['network '+str(net)] = networks[net]
            netInfo['params '+str(net)] =  layers.get_all_param_values(networks[net])
        Net_FileName = 'pretraining_loss'+str(epoch)+'.pkl'
        pickle.dump(netInfo, open(os.path.join('//data/cms2010/', Net_FileName), 'wb'))
        #decrease learning rate and amount of 'pretraining' loss
    if epoch%100 == 20:    
        coef_loss.set_value(coef_loss.get_value() * decay)
        lr = lr*0.9
        
    batch_loss_m = 0.
    batch_loss_p = 0.
    for i, idx in enumerate(batch_stream(indx_train, batch_size=batch_size)):
        mod, pure = train(idx, c_reg, lr)
        batch_loss_m += mod
        batch_loss_p += pure
    pure_losses[0,epoch] = batch_loss_p/n_batches
    pure_losses[1,epoch] = batch_loss_m/n_batches
    
    sum_pred_test = np.zeros((len(indx_val)))   
    for k, net in enumerate(networks):       
        prediction_net = get_get_predictions[net](indx_val)
        sum_pred_test += prediction_net
        validation_losses[k,epoch] = 1 - roc_auc_score(
            labels[indx_val],
            prediction_net,
            sample_weight=weights[indx_val]) 
    f_and = np.exp(sum_pred_test - 4.)   
    validation_losses[k+1,epoch] = 1 - roc_auc_score(labels[indx_val],f_and,sample_weight=weights[indx_val]) 
    
    #plots
    display.clear_output(wait=True)
    fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(16, 6))
        
    axes[0].set_title("Training loss")
    axes[0].set_xlabel("#epoch")
    axes[0].set_ylabel("loss")
    for n in range(2):
        axes[0].plot(pure_losses[n][:(epoch + 1)])
    axes[0].legend(['pure_loss', 'modified_loss'], loc = 'best')
    
    axes[1].set_title("Val 1-auc")
    axes[1].set_xlabel("#epoch")
    axes[1].set_ylabel("1-auc")
    for n in range(5):
        axes[1].plot(validation_losses[n][:(epoch + 1)])
    axes[1].legend(list(networks.keys())+['f_and'], loc = 'best')
    plt.show()



In [46]:
epoch


Out[46]:
149

In [47]:
probas_train = {}
probas_val = {}
probas_test = {}

for k in networks.keys():
    probas_train[k] = get_get_predictions[k](indx_train)
    probas_val[k] = get_get_predictions[k](indx_val)
    probas_test[k] = get_get_predictions[k](indx_test)

In [48]:
plt.figure(figsize=(28, 5))
a = np.zeros((len(np.where(labels[indx_val]==0)[0])))
for k in networks.keys():
    a+=probas_val[k][np.where(labels[indx_val]==0)[0]]
plt.scatter(np.arange(len(np.where(labels[indx_val]==0)[0])),a, c ='g')
plt.show


Out[48]:
<function matplotlib.pyplot.show>

In [49]:
plt.figure(figsize=(28, 5))
a = np.zeros((len(np.where(labels[indx_test]==0)[0])))
for k in networks.keys():
    a+=probas_test[k][np.where(labels[indx_test]==0)[0]]
plt.scatter(np.arange(len(np.where(labels[indx_test]==0)[0])),a, c ='g')
plt.show


Out[49]:
<function matplotlib.pyplot.show>

In [50]:
np.mean(a)


Out[50]:
2.099333146606248

In [118]:
# Net_FileName = 'pretraining_loss'+str(800)+'.pkl'
# netInfoload = pickle.load(open(os.path.join('models/',Net_FileName),'rb'))
# for net in networks:
#     layers.set_all_param_values(networks[net], netInfoload['params '+str(net)])

Performance plots


In [51]:
plt.figure(figsize=(8, 8))

sum_pred = np.zeros((len(indx_val)))
log_and = np.ones((len(indx_val)))
for k in networks.keys():
    common_proba = get_get_predictions[k](indx_val)
    sum_pred += common_proba
    log_and*= common_proba


    plt.plot([0, 1], [0, 1], '--', color='black')

    fpr, tpr, _ = roc_curve(np.array(labels, 'float32')[indx_val], common_proba, sample_weight=weights[indx_val])
    auc_score = auc(fpr, tpr, reorder=True)
    plt.plot(fpr, tpr, label='Deterministic output, AUC for %s : %.3lf' % (k, auc_score))
    print ('Deterministic output, AUC for %s : %.3lf' % (k, auc_score))
f_and = np.exp(sum_pred - 4.)

fpr, tpr, _ = roc_curve(np.array(np.array(labels, 'float32'), 'float32')[indx_val], f_and, sample_weight=weights[indx_val])
auc_score = auc(fpr, tpr, reorder=True)
plt.plot(fpr, tpr, label='Deterministic output, AUC fuzzy_and : %.3lf' % auc_score)


plt.legend(loc='lower right')
plt.title('ROC curve for the network', fontsize=24)
plt.xlabel('FPR', fontsize=20)
plt.ylabel('TPR', fontsize=20)
plt.show()


Deterministic output, AUC for muons : 0.827
Deterministic output, AUC for photons : 0.798
Deterministic output, AUC for PF : 0.916
Deterministic output, AUC for calo : 0.914

In [52]:
plt.figure(figsize=(8, 8))

sum_pred = np.zeros((len(indx_test)))
log_and = np.ones((len(indx_test)))
for k in networks.keys():
    common_proba = get_get_predictions[k](indx_test)
    sum_pred += common_proba
    log_and*= common_proba


    plt.plot([0, 1], [0, 1], '--', color='black')

    fpr, tpr, _ = roc_curve(np.array(labels, 'float32')[indx_test], common_proba, sample_weight=weights[indx_test])
    auc_score = auc(fpr, tpr, reorder=True)
    plt.plot(fpr, tpr, label='Deterministic output, AUC for %s : %.3lf' % (k, auc_score))
    print ('Deterministic output, AUC for %s : %.3lf' % (k, auc_score))
f_and = np.exp(sum_pred - 4.)

fpr, tpr, _ = roc_curve(np.array(np.array(labels, 'float32'), 'float32')[indx_test], f_and, sample_weight=weights[indx_test])
auc_score = auc(fpr, tpr, reorder=True)
plt.plot(fpr, tpr, label='Deterministic output, AUC fuzzy_and : %.3lf' % auc_score)


plt.legend(loc='lower right')
plt.title('ROC curve for the network', fontsize=24)
plt.xlabel('FPR', fontsize=20)
plt.ylabel('TPR', fontsize=20)
plt.show()


Deterministic output, AUC for muons : 0.718
Deterministic output, AUC for photons : 0.837
Deterministic output, AUC for PF : 0.952
Deterministic output, AUC for calo : 0.963

In [53]:
minproba_val = np.min(np.vstack((probas_val['PF'], probas_val['calo'],
                                 probas_val['muons'], probas_val["photons"])), axis=0)

minproba_test = np.min(np.vstack((probas_test['PF'], probas_test['calo'],
                                 probas_test['muons'], probas_test["photons"])), axis=0)

In [54]:
indx = indx_val
probas = probas_val
minproba = minproba_val

sum_pred = np.zeros((len(indx))) 
for k in networks.keys():
    proba = probas[k]
    sum_pred += proba
    colors = ['#ed0021ff','#01017f']
    plt.figure(figsize=(7, 7))
    plt.hist([
            proba[labels[indx] == 0.0],
            proba[labels[indx] == 1.0]
    ],bins=20, range=(0, 1), weights=[
        weights[indx][labels[indx] == 0.0] / np.sum(weights[indx][labels[indx] == 0.0]),
        weights[indx][labels[indx] == 1.0] / np.sum(weights[indx][labels[indx] == 1.0])
    ], histtype='step', label=['Anomalous lumisections', 'Good lumisections'], lw=3,
         alpha=1, fill= False, color=colors)
    plt.legend(loc='upper center', fontsize=20)
    plt.title('%s channel' % k, fontsize=30, color = 'black' )
    plt.ylabel('luminosity fraction', fontsize=20, color = 'black')
    plt.xlabel(r'subnetwork output', fontsize=20, color = 'black')
    plt.xticks(fontsize=20, color = 'black')
    plt.yticks(fontsize=20, color = 'black')
    plt.grid(False)
    plt.axis('on')
    
plt.figure(figsize=(7, 7))   
f_and = np.exp(sum_pred - 4.)
plt.hist([
        f_and[labels[indx] == 0.0],
        f_and[labels[indx] == 1.0]
],bins=20, range=(0, 1), weights=[
    weights[indx][labels[indx] == 0.0] / np.sum(weights[indx][labels[indx] == 0.0]),
    weights[indx][labels[indx] == 1.0] / np.sum(weights[indx][labels[indx] == 1.0])
], histtype='step', label=['Anomalous lumisections', 'Good lumisections'], lw=3,
     alpha=1, fill= False, color=colors)
plt.legend(loc='upper center', fontsize=20)
plt.title("Fuzzy and", fontsize=30, color = 'black' )
plt.ylabel('luminosity fraction', fontsize=20, color = 'black')
plt.xlabel(r'network output', fontsize=20, color = 'black')
plt.xticks(fontsize=20, color = 'black')
plt.yticks(fontsize=20, color = 'black')
plt.grid(False)
plt.axis('on')
plt.show()


proba = minproba
plt.figure(figsize=(7, 7))
plt.hist([
        proba[labels[indx] == 0.0],
        proba[labels[indx] == 1.0]
],bins=20, range=(0, 1), weights=[
    weights[indx][labels[indx] == 0.0] / np.sum(weights[indx][labels[indx] == 0.0]),
    weights[indx][labels[indx] == 1.0] / np.sum(weights[indx][labels[indx] == 1.0])
], histtype='step', label=['Anomalous lumisections', 'Good lumisections'], lw=3,
     alpha=1, fill= False, color=colors)
plt.legend(loc='upper center', fontsize=20)
plt.title('minproba_val' , fontsize=30, color = 'black' )
plt.ylabel('luminosity fraction', fontsize=20, color = 'black')
plt.xlabel(r'min subnetwork output', fontsize=20, color = 'black')
plt.xticks(fontsize=20, color = 'black')
plt.yticks(fontsize=20, color = 'black')
plt.grid(False)
plt.axis('on')
plt.show()



In [55]:
indx = indx_test
probas = probas_test
minproba = minproba_test

sum_pred = np.zeros((len(indx))) 
for k in networks.keys():
    proba = probas[k]
    sum_pred += proba
    colors = ['#ed0021ff','#01017f']
    plt.figure(figsize=(7, 7))
    plt.hist([
            proba[labels[indx] == 0.0],
            proba[labels[indx] == 1.0]
    ],bins=20, range=(0, 1), weights=[
        weights[indx][labels[indx] == 0.0] / np.sum(weights[indx][labels[indx] == 0.0]),
        weights[indx][labels[indx] == 1.0] / np.sum(weights[indx][labels[indx] == 1.0])
    ], histtype='step', label=['Anomalous lumisections', 'Good lumisections'], lw=3,
         alpha=1, fill= False, color=colors)
    plt.legend(loc='upper center', fontsize=20)
    plt.title('%s channel' % k, fontsize=30, color = 'black' )
    plt.ylabel('luminosity fraction', fontsize=20, color = 'black')
    plt.xlabel(r'subnetwork output', fontsize=20, color = 'black')
    plt.xticks(fontsize=20, color = 'black')
    plt.yticks(fontsize=20, color = 'black')
    plt.grid(False)
    plt.axis('on')
    
plt.figure(figsize=(7, 7))   
f_and = np.exp(sum_pred - 4.)
plt.hist([
        f_and[labels[indx] == 0.0],
        f_and[labels[indx] == 1.0]
],bins=20, range=(0, 1), weights=[
    weights[indx][labels[indx] == 0.0] / np.sum(weights[indx][labels[indx] == 0.0]),
    weights[indx][labels[indx] == 1.0] / np.sum(weights[indx][labels[indx] == 1.0])
], histtype='step', label=['Anomalous lumisections', 'Good lumisections'], lw=3,
     alpha=1, fill= False, color=colors)
plt.legend(loc='upper center', fontsize=20)
plt.title("Fuzzy and", fontsize=30, color = 'black' )
plt.ylabel('luminosity fraction', fontsize=20, color = 'black')
plt.xlabel(r'network output', fontsize=20, color = 'black')
plt.xticks(fontsize=20, color = 'black')
plt.yticks(fontsize=20, color = 'black')
plt.grid(False)
plt.axis('on')
plt.show()




proba = minproba
plt.figure(figsize=(7, 7))
plt.hist([
        proba[labels[indx] == 0.0],
        proba[labels[indx] == 1.0]
],bins=20, range=(0, 1), weights=[
    weights[indx][labels[indx] == 0.0] / np.sum(weights[indx][labels[indx] == 0.0]),
    weights[indx][labels[indx] == 1.0] / np.sum(weights[indx][labels[indx] == 1.0])
], histtype='step', label=['Anomalous lumisections', 'Good lumisections'], lw=3,
     alpha=1, fill= False, color=colors)
plt.legend(loc='upper center', fontsize=20)
plt.title('minproba_test' , fontsize=30, color = 'black' )
plt.ylabel('luminosity fraction', fontsize=20, color = 'black')
plt.xlabel(r'min subnetwork output', fontsize=20, color = 'black')
plt.xticks(fontsize=20, color = 'black')
plt.yticks(fontsize=20, color = 'black')
plt.grid(False)
plt.axis('on')
plt.show()



In [124]:
# np.save('probas_last.npy', probas) 

# # # Load
# # read_dictionary = np.load('probas.npy').item()
# # print(read_dictionary) # displays "world"

In [56]:
Ids_labels = ['runId','lumiId','lumi','isSig','subsystemQuality']
subsystemNames = ['new_json', 'L1tcalo', 'L1tmu', 'Hlt', 'Pix', 'Strip', 'Ecal', 'Hcal', 'Dt',
                    'Rpc', 'Es', 'Csc', 'Track', 'Egamma', 'Muon', 'Jetmet', ]

In [57]:
metric = roc_auc_score
met_name = 'roc_auc_score'
probas = probas_val
indx = indx_val



channels = networks.keys()
sub_systems = subsystemNames

aucs = np.ones(shape=(len(channels), len(sub_systems))) / 2.0

for i, channel in enumerate(channels):
    for j, sub_system in enumerate(sub_systems):
        try:
            aucs[i, j] = metric(np.array(sublabels[sub_system], 'float32')[indx], probas[channel], sample_weight=weights[indx])
            
        except Exception as e:
            print (e)
            
            
fig = plt.figure(figsize=(36, 14))

im = plt.imshow(aucs, interpolation='None', aspect=1, cmap = 'jet')
plt.colorbar(im, shrink=0.75).ax.tick_params(labelsize=40) 


plt.xticks(np.arange(len(sub_systems)), 
           [k+" ("+str(len(np.where(np.array(sublabels[k], 'float32')[indx]==0)[0]))+") " for k in sub_systems], rotation=90, fontsize=40, color = 'black')
plt.yticks(np.arange(4), [ "%s" % g for g in channels ], fontsize=40, color = 'black')
plt.title('ROC AUC score'+' of subnetwork scores against ground truth labels by subsystem', fontsize=50)
plt.grid(False)
plt.tight_layout()

plt.show()



In [127]:
metric = roc_auc_score
met_name = 'roc_auc_score'
probas = probas_test
indx = indx_test



channels = networks.keys()
sub_systems = subsystemNames

aucs = np.ones(shape=(len(channels), len(sub_systems))) / 2.0

for i, channel in enumerate(channels):
    for j, sub_system in enumerate(sub_systems):
        try:
            aucs[i, j] = metric(np.array(sublabels[sub_system], 'float32')[indx], probas[channel], sample_weight=weights[indx])
            
        except Exception as e:
            print (e)
            
            
fig = plt.figure(figsize=(36, 14))

im = plt.imshow(aucs, interpolation='None', aspect=1, cmap = 'jet')
plt.colorbar(im, shrink=0.75).ax.tick_params(labelsize=40) 


plt.xticks(np.arange(len(sub_systems)), 
           [k+" ("+str(len(np.where(np.array(sublabels[k], 'float32')[indx]==0)[0]))+") " for k in sub_systems], rotation=90, fontsize=40, color = 'black')
plt.yticks(np.arange(4), [ "%s" % g for g in channels ], fontsize=40, color = 'black')
plt.title('ROC AUC score'+' of subnetwork scores against ground truth labels by subsystem', fontsize=50)
plt.grid(False)
plt.tight_layout()

plt.show()



In [58]:
metric = roc_auc_score
met_name = 'roc_auc_score'
probas = probas_train
indx = indx_train



channels = networks.keys()
sub_systems = subsystemNames

aucs = np.ones(shape=(len(channels), len(sub_systems))) / 2.0

for i, channel in enumerate(channels):
    for j, sub_system in enumerate(sub_systems):
        try:
            aucs[i, j] = metric(np.array(sublabels[sub_system], 'float32')[indx], probas[channel], sample_weight=weights[indx])
            
        except Exception as e:
            print (e)
            
            
fig = plt.figure(figsize=(36, 14))

im = plt.imshow(aucs, interpolation='None', aspect=1, cmap = 'jet')
plt.colorbar(im, shrink=0.75).ax.tick_params(labelsize=40) 


plt.xticks(np.arange(len(sub_systems)), 
           [k+" ("+str(len(np.where(np.array(sublabels[k], 'float32')[indx]==0)[0]))+") " for k in sub_systems], rotation=90, fontsize=40, color = 'black')
plt.yticks(np.arange(4), [ "%s" % g for g in channels ], fontsize=40, color = 'black')
plt.title('ROC AUC score'+' of subnetwork scores against ground truth labels by subsystem', fontsize=50)
plt.grid(False)
plt.tight_layout()

plt.show()



In [59]:
##train
indx = indx_train
probas = probas_train

fr = np.ones(shape=(len(channels), len(subsystemNames))) / 2.0

for i, channel in enumerate(channels):
    for j, sub_system in enumerate(subsystemNames):
        try:
            sub_lb = np.array(sublabels[sub_system], 'float32')[indx]
            round_probas = list(map( round,probas[channel]))
            
            fr[i, j] = np.float(len(np.where((round_probas == sub_lb)&(sub_lb==0))[0]))/len(np.where(sub_lb==0)[0])
            

        except Exception as e:
            print (e)
            
            
fig = plt.figure(figsize=(36, 14))

im = plt.imshow(fr, interpolation='None', aspect=1, cmap = 'jet')
plt.colorbar(im, shrink=0.75).ax.tick_params(labelsize=40) 


plt.xticks(np.arange(len(sub_systems)), 
           [k+" ("+str(len(np.where(np.array(sublabels[k], 'float32')[indx]==0)[0]))+") " for k in sub_systems], rotation=90, fontsize=40, color = 'black')
plt.yticks(np.arange(4), [ "%s" % g for g in channels ], fontsize=40, color = 'black')
plt.title('Fraction of detected by channel anomalies', fontsize=50)
plt.grid(False)
plt.tight_layout()

plt.show()



In [60]:
##train
indx = indx_val
probas = probas_val

fr = np.ones(shape=(len(channels), len(subsystemNames))) / 2.0

for i, channel in enumerate(channels):
    for j, sub_system in enumerate(subsystemNames):
        try:
            sub_lb = np.array(sublabels[sub_system], 'float32')[indx]
            round_probas = list(map( round,probas[channel]))
            
            fr[i, j] = np.float(len(np.where((round_probas == sub_lb)&(sub_lb==0))[0]))/len(np.where(sub_lb==0)[0])
            

        except Exception as e:
            print (e)
            
            
fig = plt.figure(figsize=(36, 14))

im = plt.imshow(fr, interpolation='None', aspect=1, cmap = 'jet')
plt.colorbar(im, shrink=0.75).ax.tick_params(labelsize=40) 


plt.xticks(np.arange(len(sub_systems)), 
           [k+" ("+str(len(np.where(np.array(sublabels[k], 'float32')[indx]==0)[0]))+") " for k in sub_systems], rotation=90, fontsize=40, color = 'black')
plt.yticks(np.arange(4), [ "%s" % g for g in channels ], fontsize=40, color = 'black')
plt.title('Fraction of detected by channel anomalies', fontsize=50)
plt.grid(False)
plt.tight_layout()

plt.show()



In [61]:
##train
indx = indx_test
probas = probas_test

fr = np.ones(shape=(len(channels), len(subsystemNames))) / 2.0

for i, channel in enumerate(channels):
    for j, sub_system in enumerate(subsystemNames):
        try:
            sub_lb = np.array(sublabels[sub_system], 'float32')[indx]
            round_probas = list(map( round,probas[channel]))
            
            fr[i, j] = np.float(len(np.where((round_probas == sub_lb)&(sub_lb==0))[0]))/len(np.where(sub_lb==0)[0])
            

        except Exception as e:
            print (e)
            
            
fig = plt.figure(figsize=(36, 14))

im = plt.imshow(fr, interpolation='None', aspect=1, cmap = 'jet')
plt.colorbar(im, shrink=0.75).ax.tick_params(labelsize=40) 


plt.xticks(np.arange(len(sub_systems)), 
           [k+" ("+str(len(np.where(np.array(sublabels[k], 'float32')[indx]==0)[0]))+") " for k in sub_systems], rotation=90, fontsize=40, color = 'black')
plt.yticks(np.arange(4), [ "%s" % g for g in channels ], fontsize=40, color = 'black')
plt.title('Fraction of detected by channel anomalies', fontsize=50)
plt.grid(False)
plt.tight_layout()

plt.show()



In [62]:
min_subsys = np.array(np.min(sublabels[subsystemNames[1:]+["Lumi"]], axis=1))

In [63]:
len(np.where(min_subsys!=np.array(sublabels['new_json']))[0])


Out[63]:
3131

In [64]:
3131./data_features.shape[0]


Out[64]:
0.019209767470396957

In [65]:
fig = plt.figure(figsize=(40, 10))
plt.scatter(np.arange(data_features.shape[0]),sublabels['new_json'], label='global quality', s=20.)
plt.scatter(np.arange(data_features.shape[0]),min_subsys, label="min of the subsystems labels", s=20.)
plt.legend(fontsize=20)
plt.xticks(fontsize=20)
plt.yticks(fontsize=20)
plt.show()



In [66]:
for sys in subsystemNames:
    fig = plt.figure(figsize=(36, 14))
    print(sys)
    plt.scatter(np.arange(data_features.shape[0]),sublabels[sys])
    plt.show()


new_json
L1tcalo
L1tmu
Hlt
Pix
Strip
Ecal
Hcal
Dt
Rpc
Es
Csc
Track
Egamma
Muon
Jetmet

In [ ]: