In [13]:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import numpy as np
from cvxopt import matrix, solvers
from tools import Data, Predictions
""" A PDF reweighting tool """
__author__ = 'Stefano Carrazza & Zahari Kassabov'
__license__ = 'GPL'
__version__ = '1.0.0'
__email__ = 'stefano.carrazza@cern.ch'
In [123]:
# load data, invcovmat and predictions
dt = Data('data/data.csv.tgz', 'data/invcovmat.csv.tgz')
th = Predictions('data/predictions.csv.tgz')
# retreive cv and invcovmat as numpy arrays
cv = dt.get_data()
sigma = dt.get_invcovmat()
predictions = th.get_predictions()
In [124]:
def effective_number_of_replicas(w):
N = len(w)
w = w*N
return np.exp(np.nansum(w*np.log(N/w))/N)
In [125]:
effective_number_of_replicas(np.ones(100))
Out[125]:
In [126]:
def chi2_per_replica(sigma, cv, predictions):
diff = predictions - cv
return np.einsum('id,de,ie -> i',diff, sigma, diff )/len(sigma)
In [127]:
def nnpdf_weights(sigma, cv, predictions):
chi2s = chi2_per_replica(sigma, cv, predictions)*len(sigma)
logw = ((len(sigma) - 1)/2)*np.log(chi2s) - 0.5*chi2s
logw -= np.mean(logw)
w = np.exp(logw)
w /= sum(w)
return w
In [158]:
nw = nnpdf_weights(sigma, cv, predictions)
In [159]:
effective_number_of_replicas(nw)
Out[159]:
In [130]:
def chi2(w, sigma, cv, predictions):
w = np.ravel(w)
diff = w@predictions - cv
chi2 = diff@sigma@diff/len(diff)
return chi2
The chi² function is
v = (w@P) - cv chi² = v@sigma@v
Therefore we need to minimize 1/2*w@P@W + q@w as a function of w
In [356]:
q = -cv@sigma@predictions.T
A = np.ones_like(q)
#b = np.zeros_like(P[0])
b = float(1)
G = -np.eye(len(A))
h = np.zeros(len(A))
q = matrix(q)
A = matrix(A)
b = matrix(b)
G = matrix(G)
h = matrix(h)
mp = np.mean(predictions,axis=0)
mean_chi2 = (mp@sigma@mp)
solvers.options['feastol'] = 1e-16
mp = np.mean(predictions, axis=0)
penalty_scale = np.abs(0.5*mp@sigma@mp -cv@sigma@mp)
def get_min_weights(penalty):
eff_pelnalty = penalty*penalty_scale
P = predictions@sigma@predictions.T + np.eye(len(predictions))*eff_pelnalty
P = matrix(P)
sol = solvers.qp(P, q, G, h, A.T, b)
w = np.array(sol['x']).ravel()
return w
In [357]:
penalty_scale
Out[357]:
In [375]:
w = get_min_weights(0.01)
In [376]:
effective_number_of_replicas(w)
Out[376]:
In [377]:
(w > 0).all()
Out[377]:
In [378]:
chi2(w, sigma, cv, predictions)
Out[378]:
In [379]:
sum(w)
Out[379]:
In [380]:
%matplotlib inline
In [381]:
w.min()
Out[381]:
In [382]:
w.max()
Out[382]:
In [383]:
np.sort(w)
Out[383]:
In [384]:
np.median(w)
Out[384]:
In [385]:
np.argsort(w)[-15:]
Out[385]:
In [386]:
chi2(w, sigma, cv, predictions)
Out[386]:
In [387]:
np.mean(predictions, axis=0)@sigma@np.mean(predictions, axis=0)
Out[387]:
In [388]:
effective_number_of_replicas(w)
Out[388]:
In [ ]:
In [396]:
%matplotlib inline
import matplotlib.pyplot as plt
from ipywidgets import interact
solvers.options['show_progress'] = False
def show_sol(penalty=0):
w = get_min_weights(penalty)
plt.title("$chi^2=%.3f, N_{eff}=%.1f$" % (chi2(w, predictions=predictions,
cv=cv, sigma=sigma),
effective_number_of_replicas(w)))
w*=len(A)
plt.hist(w)
interact(show_sol, penalty=(0,0.50,0.0001))
In [397]:
#w = get_min_weights(2000)
w = nw
avg = np.average(predictions, weights=w, axis=0)
var = np.average((predictions - avg)**2, weights=w, axis=0)
rwstd = np.sqrt(var)
origstd = np.std(predictions, axis=0)
ratio = rwstd/origstd
plt.hist(ratio)
plt.title(r'$\sigma_{rw}/\sigma_0$ NNPDF')
Out[397]:
In [409]:
w = get_min_weights(0)
#w = nw
avg = np.average(predictions, weights=w, axis=0)
var = np.average((predictions - avg)**2, weights=w, axis=0)
rwstd = np.sqrt(var)
origstd = np.std(predictions, axis=0)
ratio = rwstd/origstd
plt.hist(ratio)
plt.title(r'$\sigma_{rw}/\sigma_0$ Minimization')
Out[409]:
In [407]:
w = get_min_weights(0.0107)
effective_number_of_replicas(w)
Out[407]:
In [408]:
w = get_min_weights(0.0107)
#w = nw
avg = np.average(predictions, weights=w, axis=0)
var = np.average((predictions - avg)**2, weights=w, axis=0)
rwstd = np.sqrt(var)
origstd = np.std(predictions, axis=0)
ratio = rwstd/origstd
plt.hist(ratio)
plt.title(r'$\sigma_{rw}/\sigma_0$ Minimization With WP')
Out[408]:
In [169]:
plt.plot(cv, predictions.mean(axis=0), 'o')
plt.plot(cv, w@predictions, 'o')
plt.plot(cv, cv)
plt.xscale('log')
plt.yscale('log')
Out[169]:
In [11]:
%matplotlib qt
import matplotlib.pyplot as plt
In [12]:
plt.plot(predictions.mean(axis=0)/cv, 'o')
plt.plot(w@predictions/cv, 'o')
Out[12]:
In [72]:
w
Out[72]:
In [ ]: