In [ ]:
%matplotlib inline
In [ ]:
from __future__ import print_function
import os
import sys
import time
sys.path.append('/afs/crc.nd.edu/user/a/awoodard/releases/np-fit-production/CMSSW_7_4_7/python')
sys.path.append('/afs/crc.nd.edu/user/a/awoodard/.local/lib/python2.7/site-packages/')
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import LogNorm
import matplotlib.ticker as ticker
In [ ]:
from NPFitProduction.NPFitProduction.cross_sections import parse_lhe_weights, CrossSectionScan
In [ ]:
%load_ext autoreload
%autoreload 2
In [ ]:
tweaks = {
"legend.fontsize": "x-large",
"legend.edgecolor": "white",
"legend.facecolor": "white",
"axes.labelsize": "x-large",
"axes.titlesize": "x-large",
"xtick.labelsize": "large",
"xtick.major.size": 5,
"ytick.major.size": 5,
"ytick.labelsize": "large",
}
plt.rc(tweaks)
In [28]:
tag = 'reweight_v9'
tag = 'reweight_v10'
tag = 'reweight_v16'
coefficients = ['cuB', 'cuG']
tag = 'reweight_v17'
coefficients = ['cuB', 'cuG', 'c3G']
tag = 'reweight_v19'
coefficients = ['cuB', 'cuG', 'c3G', 'cHu']
tag = 'reweight_v21'
coefficients = ['cHu', 'cuB', 'cuG', 'c3G', 'tc3G']
tag = 'reweight_v22'
coefficients = ['cuB', 'cuG', 'c3G', 'cHu', 'tc3G', 'cH']
tag = 'reweight_v24'
coefficients = ['cuB', 'cH', 'tc3G', 'c3G', 'cHu', 'c2G', 'cuG']
tag = 'reweight_v15'
tag = 'reweight_v14'
coefficients = ['cuB']
tag = 'reweight_v25'
tag = 'reweight_v20'
coefficients = ['cuW', 'cuB', 'cH', 'tc3G', 'c3G', 'cHu', 'c2G', 'cuG']
outdir = '/afs/crc.nd.edu/user/a/awoodard/www/.private/ttV/' + tag
lhe = '/afs/crc.nd.edu/user/a/awoodard/np-fit-production/test/{}/processtmp/Events/run_01/unweighted_events.lhe'.format(tag)
In [29]:
indices = dict((c, i) for i, c in enumerate(coefficients))
if not os.path.isdir(outdir):
os.makedirs(outdir)
In [30]:
scan = CrossSectionScan("/afs/crc.nd.edu/user/a/awoodard/np-fit-production/data/8d.npz")
weights
: each row corresponds to an event, each column corresponds to a point
points
: each row corresponds to a point, each column corresponds to a coefficient
In [31]:
start_values, points, weights = parse_lhe_weights(lhe, coefficients)
In [24]:
test = [i for i in range(1, len(points)) if i % 10 == 0]
train = [i for i in range(1, len(points)) if i % 10 != 0]
errs = np.zeros((len(weights), len(points)))
fits = np.zeros((len(weights), len(points)))
start = time.time()
for event in range(len(weights)):
constants, _, _, _ = np.linalg.lstsq(scan.model(points[train]), weights[event][train])
fit = np.dot(scan.model(points), constants)
mg = weights[event]
errs[event] = (mg - fit) / mg * 100
fits[event] = fit
print('finished {} {} dimensional fits to {} points in {} seconds'.format(
len(weights),
points.shape[1],
len(points[train]),
int(time.time() - start))
)
In [25]:
plt.hist(errs[:,test].ravel(), 50, histtype='step', log=True, fill=False)
plt.xlabel('$(\mathrm{weight}_{\mathrm{MG}} - \mathrm{weight}_{\mathrm{fit}}) / \mathrm{weight}_{\mathrm{MG}} * 100$')
plt.ylabel('event points')
plt.savefig(os.path.join(outdir, 'errs.pdf'), bbox_inches='tight')
In [26]:
x = np.concatenate([points[:, indices['cuB']] for i in weights]).ravel()
y = weights.ravel()
bins = [
np.linspace(x.min(), x.max(), 20),
np.logspace(np.log10(y.min()), np.log10(y.max()), 20)
]
plt.yscale('log')
H, xedges, yedges = np.histogram2d(x, y, bins=bins)
plt.pcolormesh(xedges, yedges, H.T, norm=LogNorm())
plt.xlabel('$\mathrm{c}_\mathrm{uB}$')
plt.ylabel('weight')
plt.colorbar(label='event points')
plt.savefig(os.path.join(outdir, 'cuB_vs_weight.pdf'), bbox_inches='tight')
Above is the projection from the 8-dimensional space onto the cuB axis.
In [27]:
x = np.concatenate([points[test][:, indices['cuB']] for i in errs[:, test]]).ravel()
y = np.abs(errs[:, test]).ravel()
bins = [
np.linspace(x.min(), x.max(), 20),
np.logspace(np.log10(y.min()), np.log10(y.max()), 20)
]
print(bins)
plt.yscale('log')
H, xedges, yedges = np.histogram2d(x, y, bins=bins)
plt.pcolormesh(xedges, yedges, H.T, norm=LogNorm())
plt.xlabel('$\mathrm{c}_\mathrm{uB}$')
plt.ylabel('$(\mathrm{weight}_{\mathrm{MG}} - \mathrm{weight}_{\mathrm{fit}}) / \mathrm{weight}_{\mathrm{MG}} * 100$')
plt.colorbar(label='event points')
plt.savefig(os.path.join(outdir, 'cuB_vs_err.pdf'), bbox_inches='tight')
Note that the plot above only uses test points which were not included in the fit. The blank vertical bars correspond to the train
points which were included in the fit.
In [14]:
x = weights[:, test].ravel()
y = np.abs(errs[:, test].ravel())
bins = [
np.logspace(np.log10(x.min()), np.log10(x.max()), 40),
np.logspace(np.log10(y.min()), np.log10(y.max()), 40)
]
with plt.rc_context(tweaks):
plt.xscale('log')
plt.yscale('log')
H, xedges, yedges = np.histogram2d(x, y, bins=bins)
plt.pcolormesh(xedges, yedges, H.T, norm=LogNorm())
plt.xlabel('weight')
plt.ylabel('100 $\\frac{\mathrm{weight}_{\mathrm{MG}} - \mathrm{weight}_{\mathrm{fit}}}{\mathrm{weight}_{\mathrm{MG}}}$', fontsize='x-large')
plt.colorbar(label='event points')
#plt.title('{}d parameterization'.format(len(coefficients)))
plt.savefig(os.path.join(outdir, 'weight_vs_err.pdf'), bbox_inches='tight')
The only thing more confusing than a log log plot is a log log log plot.
In [32]:
with plt.rc_context(tweaks):
bins = np.logspace(np.log10(fits.min()), np.log10(fits.max()), 100)
plt.xscale('log')
plt.yscale('log')
plt.hist(fits.ravel(), bins=bins, histtype='step', fill=False, log=True, lw=3, color='red')
plt.hist(weights.ravel(), bins=bins, histtype='step', fill=False, log=True, color='blue')
fit_label, = plt.plot([0, 0], color='red') # hack to show lines instead of boxes in the legend
mg_label, = plt.plot([0, 0], color='blue')
plt.xlabel('weight')
plt.ylabel('event points')
plt.legend([fit_label, mg_label], ['fit', 'MG'], loc='upper right')
plt.savefig(os.path.join(outdir, 'fit_and_mg.pdf'), bbox_inches='tight')
In [16]:
bins = np.logspace(np.log10(fits.min()), np.log10(fits.max()), 100)
plt.xscale('log')
plt.yscale('log')
plt.hist(fits[:, test].ravel(), bins=bins, histtype='step', fill=False, log=True, lw=3, color='red')
plt.hist(weights[:, test].ravel(), bins=bins, histtype='step', fill=False, log=True, color='blue')
fit_label, = plt.plot([0, 0], color='red') # hack to show lines instead of boxes in the legend
mg_label, = plt.plot([0, 0], color='blue')
plt.xlabel('weight')
plt.ylabel('event points')
plt.legend([fit_label, mg_label], ['fit', 'MG'], loc='upper right')
plt.savefig(os.path.join(outdir, 'fit_and_mg_only_test_points.pdf'), bbox_inches='tight')
In [17]:
scan.fit()
scan.scales(('c2G', 'c3G', 'cH', 'cHu', 'cuB', 'cuG', 'cuW', 'tc3G'), 'ttZ')
sm = scan.cross_sections['sm']['ttZ']
scales = [sum(weights[:, i]) / sm for i in range(weights.shape[1])]
x = np.concatenate([scales for i in weights]).ravel()
y = weights.ravel()
bins = [
np.linspace(x.min(), x.max(), 30),
np.logspace(np.log10(y.min()), np.log10(y.max()), 30)
]
with plt.rc_context(tweaks):
plt.yscale('log')
H, xedges, yedges = np.histogram2d(x, y, bins=bins)
plt.pcolormesh(xedges, yedges, H.T, norm=LogNorm())
plt.xlabel('$\sigma_\mathrm{NP+SM}/\sigma_\mathrm{SM}$')
plt.ylabel('weight')
plt.colorbar(label='event points')
#plt.xlim(xmax=12)
plt.savefig(os.path.join(outdir, 'weight_vs_scale.pdf'), bbox_inches='tight')
In the above plot, all of the weights corresponding to each point are added to obtain the total cross section at that point. This plot only means anything if the reweighting makes sense, which remains to be seen.
In [18]:
plt.hist(weights.ravel(), 150, histtype='step', log=True, fill=False)
plt.xlabel('weight')
plt.ylabel('event points')
plt.savefig(os.path.join(outdir, 'errs.pdf'), bbox_inches='tight')
print("weights vary between {:.0e} and {:.0e}".format(weights.min(), weights.max()))
In [19]:
unreweighted = scan.evaluate(coefficients, points, 'ttZ')
reweighted = np.vstack([sum(weights[:, i]) / sm for i in range(weights.shape[1])])
try:
start_values = start_values.reshape((1, len(start_values)))
except:
pass
plt.plot(points[:, indices['cuB']].ravel(), unreweighted.ravel(), 'o', linestyle='none', label='unreweighted')
plt.plot(points[:, indices['cuB']].ravel(), reweighted.ravel(), '+', linestyle='none', label='reweighted')
plt.plot(start_values[:, indices['cuB']], scan.evaluate(coefficients, start_values, 'ttZ'), marker='*', markersize=10, linestyle='none', color='red', label='reference model')
plt.ylabel('$\sigma_\mathrm{NP+SM}/\sigma_\mathrm{SM}$')
plt.xlabel('cuB')
plt.legend(loc='upper center')
plt.savefig(os.path.join(outdir, 'unreweighted_and_weighted.pdf'), bbox_inches='tight')
In [20]:
unreweighted = scan.evaluate(coefficients, points, 'ttZ').ravel()
reweighted = np.array([sum(weights[:, i]) / sm for i in range(weights.shape[1])])
reweighted_parameterized = np.array([sum(fits[:, i]) / sm for i in range(weights.shape[1])])
sort = unreweighted.argsort()
index = np.array(list(range(len(sort))))
plt.plot(index, reweighted_parameterized[sort], 'o', linestyle='none', markersize=10, label='reweighted (paramaterized)', color='orange')
plt.plot(index, reweighted[sort], '+', linestyle='none', markersize=10, label='reweighted')
plt.plot(index, unreweighted[sort], '.', linestyle='none', markersize=10, label='unreweighted', color='red')
plt.ylabel('$\sigma_\mathrm{NP+SM}/\sigma_\mathrm{SM}$')
plt.xlabel('point')
plt.legend(loc='upper left', title='{}d scan (reference $\sigma_\mathrm{{NP+SM}}/\sigma_\mathrm{{SM}}={:.1f}$)'.format(points.shape[1], scan.evaluate(coefficients, start_values, 'ttZ')[0]))
plt.savefig(os.path.join(outdir, 'unreweighted_and_weighted_by_point.pdf'), bbox_inches='tight')
print(outdir)
In [ ]:
In [ ]:
In [ ]: