In [1]:
import GAN.cms_datasets as cms
import GAN.plotting as plotting
import GAN.preprocessing as preprocessing
In [2]:
import GAN.utils as utils
# reload(utils)
class Parameters(utils.Parameters):
load_datasets=utils.param(["moriond_v9","abs(ScEta) < 1.5"])
c_names = utils.param(['Phi','ScEta'])
x_names = utils.param(['EtaWidth','R9','SigmaIeIe','S4','PhiWidth','mass'])#,'Pt','mass'])
# # reweight = utils.param('rewei_zee_barrel.npy')
# reweight = utils.param(['rewei_zee_barrel.npy','rewei_zee_pt_barrel.npy'])#,'rewei_zee_pu_barrel.npy'])
mcweight = utils.param('weight')
feat_transform = utils.param('minmax')
# feat_transform = utils.param('gaus')
class MyApp(utils.MyApp):
classes = utils.List([Parameters])
# Read all parameters above from command line.
# Note: names are all converted to be all capital
notebook_parameters = Parameters(MyApp()).get_params()
# copy parameters to global scope
globals().update(notebook_parameters)
notebook_parameters
Out[2]:
In [3]:
# reload(cms)
data,mc = cms.load_zee(*LOAD_DATASETS)
In [4]:
data.columns
Out[4]:
In [5]:
mc.columns
Out[5]:
In [6]:
c_names = C_NAMES
x_names = X_NAMES
data_c = data[c_names]
data_x = data[x_names]
mc_c = mc[c_names]
mc_x = mc[x_names]
In [7]:
data_x.columns, data_x.shape, data_c.columns, data_c.shape
Out[7]:
In [8]:
data_x.columns, data_c.columns
Out[8]:
In [9]:
mc_x.columns, mc_c.columns
Out[9]:
In [15]:
# # reload(preprocessing)
# if MCWEIGHT is None:
# mc_w = np.ones(mc_x.shape[0])
# else:
# mc_w = mc[MCWEIGHT].values
# if not REWEIGHT is None:
# for fil in REWEIGHT:
# info = np.load(fil)
# inputs = info[0]
# weights = info[1]
# bins = info[2:]
# # print(bins[1])
# print('weighting',inputs)
# mc_w *= preprocessing.reweight(mc,inputs,bins,weights,base=None)
# data_w = np.ones(data_x.shape[0])
In [16]:
q5 = data_x.quantile(0.05)
q95 = data_x.quantile(0.95)
data_min = data_x.min()
data_max = data_x.max()
iqr = data_x.quantile(0.75) - data_x.quantile(0.25)
In [17]:
data_min
Out[17]:
In [18]:
q5
Out[18]:
In [19]:
q5 - iqr
Out[19]:
In [20]:
(q5 - data_min)/iqr
Out[20]:
In [21]:
data_max
Out[21]:
In [22]:
q95
Out[22]:
In [23]:
q95 + iqr
Out[23]:
In [24]:
(data_max - q95)/iqr
Out[24]:
In [25]:
thr = 1.5
In [26]:
reject_low = data_x < q5 - thr*iqr
reject_high = data_x > q95 + thr*iqr
In [27]:
n_reject_low = reject_low.any(axis=1).sum()
n_reject_high = reject_high.any(axis=1).sum()
In [28]:
n_reject_low/data_x.shape[0], n_reject_high/data_x.shape[0]
Out[28]:
In [29]:
accept_data = ((data_x >= q5 - thr*iqr) & ( data_x <= q95 + thr*iqr )).all(axis=1)
accept_mc = ((mc_x >= q5 - thr*iqr) & ( mc_x <= q95 + thr*iqr )).all(axis=1)
In [39]:
accept_data.sum() / float(accept_data.shape[0])
Out[39]:
In [40]:
accept_mc.sum() / float(accept_mc.shape[0])
Out[40]:
In [41]:
data_x[accept_data].describe()
Out[41]:
In [42]:
mc_x[accept_mc].describe()
Out[42]:
In [43]:
thr_up = q95+thr*iqr
thr_down = q5-thr*iqr
In [44]:
thr_up
Out[44]:
In [45]:
thr_down
Out[45]:
In [46]:
thr_up['mass'] = 110.
thr_down['mass'] = 70.
# thr_down['Pt'] = 25.
# thr_up['Pt'] = 100.
In [47]:
thr_up.to_hdf('cleaning_zee_m_barrel.hd5','thr_up',mode='w')
thr_down.to_hdf('cleaning_zee_m_barrel.hd5','thr_down',mode='a')
In [ ]:
In [48]:
#data_x = data_x[accept_data]
mc_x = mc_x[accept_mc]
data_c = data_c[accept_data]
mc_c = mc_c[accept_mc]
data_w = data_w[accept_data]
mc_w = mc_w[accept_mc]
data_x,data_c,mc_x,mc_c,scaler_x,scaler_c = preprocessing.transform(data_x,data_c,mc_x,mc_c,FEAT_TRANSFORM)
In [49]:
for ix in range(len(x_names)):
plotting.plot_hists(data_x[:,0,ix],mc_x[:,0,ix],bins=100)#,range=[-3,3])
plt.xlabel(x_names[ix])
plt.show()
for ic in range(len(c_names)):
plotting.plot_hists(data_c[:,0,ic],mc_c[:,0,ic],bins=100)#,range=[-3,3])
plt.xlabel(c_names[ic])
plt.show()
In [ ]: