In [13]:
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import numpy as np

from cvxopt import matrix, solvers

from tools import Data, Predictions


""" A PDF reweighting tool """

__author__ = 'Stefano Carrazza & Zahari Kassabov'
__license__ = 'GPL'
__version__ = '1.0.0'
__email__ = 'stefano.carrazza@cern.ch'

In [123]:
# load data, invcovmat and predictions
dt = Data('data/data.csv.tgz', 'data/invcovmat.csv.tgz')
th = Predictions('data/predictions.csv.tgz')

# retreive cv and invcovmat as numpy arrays
cv    = dt.get_data()
sigma = dt.get_invcovmat()
predictions     = th.get_predictions()


 Opening data file: data/data.csv.tgz
 Opening data file: data/invcovmat.csv.tgz
 Opening predictions file: data/predictions.csv.tgz

In [124]:
def effective_number_of_replicas(w):
    N = len(w)
    w = w*N
    return np.exp(np.nansum(w*np.log(N/w))/N)

In [125]:
effective_number_of_replicas(np.ones(100))


Out[125]:
1.0

In [126]:
def chi2_per_replica(sigma, cv, predictions):
    diff = predictions - cv
    return np.einsum('id,de,ie -> i',diff, sigma, diff )/len(sigma)

In [127]:
def nnpdf_weights(sigma, cv, predictions):
    chi2s = chi2_per_replica(sigma, cv, predictions)*len(sigma)
    logw = ((len(sigma) - 1)/2)*np.log(chi2s) - 0.5*chi2s
    logw -= np.mean(logw)
    w = np.exp(logw)
    w /= sum(w)
    return w

In [158]:
nw = nnpdf_weights(sigma, cv, predictions)

In [159]:
effective_number_of_replicas(nw)


Out[159]:
2.4194382277024666

In [130]:
def chi2(w, sigma, cv, predictions):

    w = np.ravel(w)
    diff = w@predictions - cv

    chi2 = diff@sigma@diff/len(diff)

    return chi2

The chi² function is

v = (w@P) - cv chi² = v@sigma@v

Therefore we need to minimize 1/2*w@P@W + q@w as a function of w


In [356]:
q = -cv@sigma@predictions.T
A = np.ones_like(q)

#b = np.zeros_like(P[0])
b = float(1)

G = -np.eye(len(A))

h = np.zeros(len(A))

q = matrix(q)
A = matrix(A)
b = matrix(b)
G = matrix(G)
h = matrix(h)

mp = np.mean(predictions,axis=0)

mean_chi2 = (mp@sigma@mp)

solvers.options['feastol'] = 1e-16

mp = np.mean(predictions, axis=0)
penalty_scale = np.abs(0.5*mp@sigma@mp -cv@sigma@mp)

def get_min_weights(penalty):
    
    eff_pelnalty = penalty*penalty_scale
    P = predictions@sigma@predictions.T + np.eye(len(predictions))*eff_pelnalty



    P = matrix(P)


    
    sol = solvers.qp(P, q, G, h, A.T, b)

    w = np.array(sol['x']).ravel()
    return w

In [357]:
penalty_scale


Out[357]:
171045.6639095734

In [375]:
w = get_min_weights(0.01)

In [376]:
effective_number_of_replicas(w)


Out[376]:
33.693561768259315

In [377]:
(w > 0).all()


Out[377]:
True

In [378]:
chi2(w, sigma, cv, predictions)


Out[378]:
1.1310003801055741

In [379]:
sum(w)


Out[379]:
1.0000000000000002

In [380]:
%matplotlib inline

In [381]:
w.min()


Out[381]:
7.255668463804438e-189

In [382]:
w.max()


Out[382]:
0.066157585740448596

In [383]:
np.sort(w)


Out[383]:
array([  7.25566846e-189,   9.13086153e-189,   1.12347234e-188,
         1.28999188e-188,   1.46315910e-188,   1.50938405e-188,
         1.60646004e-188,   1.67045113e-188,   1.79025267e-188,
         1.83950111e-188,   1.93991622e-188,   2.10289831e-188,
         2.24996434e-188,   2.30446970e-188,   2.32396387e-188,
         2.86612092e-188,   3.17002438e-188,   3.44292598e-188,
         3.81072580e-188,   3.86574347e-188,   3.88370621e-188,
         4.26474418e-188,   4.39330768e-188,   5.33845447e-188,
         5.43072660e-188,   6.44251603e-188,   6.91984065e-188,
         1.30938844e-187,   2.19843156e-187,   2.65930496e-187,
         3.67353696e-187,   7.34105903e-187,   2.64905755e-186,
         1.01632923e-182,   9.47453710e-004,   1.16668688e-003,
         1.20650633e-003,   1.31217977e-003,   1.45996398e-003,
         1.64396353e-003,   2.77495093e-003,   3.19706817e-003,
         3.24882230e-003,   5.47278455e-003,   7.90152052e-003,
         8.67676367e-003,   9.32259423e-003,   9.50299808e-003,
         1.05182380e-002,   1.09780794e-002,   1.12247699e-002,
         1.29294749e-002,   1.30581498e-002,   1.63931362e-002,
         1.71848718e-002,   1.73048428e-002,   1.84731935e-002,
         2.21722407e-002,   2.29839861e-002,   2.31693274e-002,
         2.37657325e-002,   2.42110920e-002,   2.46686422e-002,
         2.48750674e-002,   2.54919577e-002,   2.70102961e-002,
         2.74091361e-002,   2.97302410e-002,   3.46207845e-002,
         3.66260803e-002,   3.66984114e-002,   3.80737231e-002,
         3.97053349e-002,   3.99286471e-002,   4.17194768e-002,
         4.55469666e-002,   4.89062894e-002,   5.00851045e-002,
         6.05448637e-002,   6.61575857e-002])

In [384]:
np.median(w)


Out[384]:
0.0022094572296667706

In [385]:
np.argsort(w)[-15:]


Out[385]:
array([61, 68, 37, 15, 63, 70, 22,  0, 32, 20, 78, 21, 25, 35, 53])

In [386]:
chi2(w, sigma, cv, predictions)


Out[386]:
1.1310003801055741

In [387]:
np.mean(predictions, axis=0)@sigma@np.mean(predictions, axis=0)


Out[387]:
349336.21890748374

In [388]:
effective_number_of_replicas(w)


Out[388]:
33.693561768259315

In [ ]:


In [396]:
%matplotlib inline
import matplotlib.pyplot as plt
from ipywidgets import interact
solvers.options['show_progress'] = False
def show_sol(penalty=0):
    w = get_min_weights(penalty)
    plt.title("$chi^2=%.3f, N_{eff}=%.1f$" % (chi2(w, predictions=predictions, 
                                                   cv=cv, sigma=sigma),
                                              effective_number_of_replicas(w)))
    w*=len(A)
    plt.hist(w)

interact(show_sol, penalty=(0,0.50,0.0001))



In [397]:
#w = get_min_weights(2000)
w = nw
avg = np.average(predictions, weights=w, axis=0)
var = np.average((predictions - avg)**2, weights=w, axis=0)
rwstd = np.sqrt(var)
origstd = np.std(predictions, axis=0)
ratio = rwstd/origstd
plt.hist(ratio)
plt.title(r'$\sigma_{rw}/\sigma_0$ NNPDF')


Out[397]:
<matplotlib.text.Text at 0x7f483f3637b8>

In [409]:
w = get_min_weights(0)
#w = nw
avg = np.average(predictions, weights=w, axis=0)
var = np.average((predictions - avg)**2, weights=w, axis=0)
rwstd = np.sqrt(var)
origstd = np.std(predictions, axis=0)
ratio = rwstd/origstd
plt.hist(ratio)
plt.title(r'$\sigma_{rw}/\sigma_0$ Minimization')


Out[409]:
<matplotlib.text.Text at 0x7f483f8ee518>

In [407]:
w = get_min_weights(0.0107)
effective_number_of_replicas(w)


Out[407]:
34.95041932169898

In [408]:
w = get_min_weights(0.0107)
#w = nw
avg = np.average(predictions, weights=w, axis=0)
var = np.average((predictions - avg)**2, weights=w, axis=0)
rwstd = np.sqrt(var)
origstd = np.std(predictions, axis=0)
ratio = rwstd/origstd
plt.hist(ratio)
plt.title(r'$\sigma_{rw}/\sigma_0$ Minimization With WP')


Out[408]:
<matplotlib.text.Text at 0x7f483f7a34e0>

In [169]:
plt.plot(cv, predictions.mean(axis=0), 'o')
plt.plot(cv, w@predictions, 'o')

plt.plot(cv, cv)
plt.xscale('log')
plt.yscale('log')


Out[169]:
(1e-07, 10000000.0)

In [11]:
%matplotlib qt
import matplotlib.pyplot as plt

In [12]:
plt.plot(predictions.mean(axis=0)/cv, 'o')
plt.plot(w@predictions/cv, 'o')


Out[12]:
[<matplotlib.lines.Line2D at 0x7f4838041278>]

In [72]:
w


Out[72]:
array([  5.00486259e-04,   4.87361116e-04,   1.08870425e-03,
         1.08033101e-01,   7.30367023e-04,   3.29872379e-04,
         5.17924095e-04,   3.40926299e-04,   3.47297778e-04,
         4.80942184e-04,   5.35189484e-04,   5.66683058e-04,
         4.67528242e-04,   9.06750894e-04,   5.78990423e-04,
         9.91329139e-04,   4.91905118e-04,   8.89457121e-05,
         1.16912163e-04,   9.94579626e-02,   5.89079037e-04,
         5.28928228e-03,   9.56105719e-05,   6.91296167e-04,
         5.56540814e-04,   8.15352064e-04,   3.94852844e-04,
         6.55589210e-04,   5.67950597e-04,   5.28232225e-04,
         5.04999366e-04,   5.24003695e-04,   5.21785174e-04,
         9.15699643e-02,   2.92109776e-04,   8.64029787e-04,
         6.40017813e-04,   2.25907622e-03,   5.10905903e-04,
         4.19384684e-04,   5.52341918e-04,   7.04245808e-04,
         5.82070481e-04,   6.02963120e-04,   1.84826990e-04,
         2.55449831e-04,   1.08056162e-03,   7.42146671e-04,
         4.23208068e-04,   4.51645662e-04,   4.32753735e-04,
         1.06921504e-03,   5.34974159e-04,   5.20726624e-04,
         5.53072248e-04,   5.12614944e-04,   3.35429850e-04,
         6.03720772e-04,   4.58384715e-04,   9.08851426e-04,
         5.65432970e-04,   6.32908012e-04,   5.25032153e-04,
         1.84907304e-04,   5.62950152e-04,   5.08577821e-04,
         5.29952459e-04,   4.82621824e-04,   5.62532724e-04,
         3.95211876e-02,   5.97605433e-01,   4.81161828e-04,
         2.80860728e-04,   4.03494628e-04,   3.59450275e-04,
         4.64566288e-04,   3.21726716e-04,   8.87830446e-04,
         1.76093187e-02,   6.51609045e-04])

In [ ]: