In [1]:
import sys
sys.path.append("..")
In [2]:
import numpy as np
import logging
import pickle
from sklearn.metrics import roc_auc_score
from sklearn.preprocessing import RobustScaler
from sklearn.utils import check_random_state
from recnn.preprocessing import permute_by_pt
from recnn.preprocessing import extract
from recnn.recnn import grnn_transform_gated
from recnn.recnn import grnn_predict_gated
%matplotlib inline
import matplotlib.pyplot as plt
plt.rcParams["figure.figsize"] = (8,8)
In [15]:
# filename_train = "../data/w-vs-qcd/final/antikt-kt-train.pickle"
# filename_test = "../data/w-vs-qcd/final/antikt-kt-test.pickle"
filename_train = "../data/w-vs-qcd/final/antikt-antikt-train.pickle"
filename_test = "../data/w-vs-qcd/final/antikt-antikt-test.pickle"
rng = check_random_state(1)
# Make training data
print("Loading training data...")
fd = open(filename_train, "rb")
X, y = pickle.load(fd)
fd.close()
y = np.array(y)
print("\tfilename = %s" % filename_train)
print("\tX size = %d" % len(X))
print("\ty size = %d" % len(y))
# Preprocessing
print("Preprocessing...")
X = [extract(permute_by_pt(jet)) for jet in X]
tf = RobustScaler().fit(np.vstack([jet["content"] for jet in X]))
for jet in X:
jet["content"] = tf.transform(jet["content"])
# Make test data
print("Loading test data...")
fd = open(filename_test, "rb")
X, y = pickle.load(fd)
fd.close()
y = np.array(y)
print("\tfilename = %s" % filename_test)
print("\tX size = %d" % len(X))
print("\ty size = %d" % len(y))
# Preprocessing
print("Preprocessing...")
X = [extract(permute_by_pt(jet)) for jet in X]
for jet in X:
jet["content"] = tf.transform(jet["content"])
In [16]:
# Loading model
print("Loading model...")
#filename_model = "../models/jet-study-2/model-w-s-antikt-kt-1.pickle"
filename_model = "../models/jet-study-2/model-w-s-antikt-antikt-1.pickle"
fd = open(filename_model, "rb")
params = pickle.load(fd)
fd.close()
print("\tfilename = %s" % filename_model)
In [5]:
from recnn.recnn import grnn_predict_simple
from sklearn.metrics import roc_auc_score
roc_auc_score(y, grnn_predict_simple(params, X))
Out[5]:
In [6]:
import autograd as ag
import autograd.numpy as np
from recnn.recnn import log_loss, relu, sigmoid
def get_branch(tree, target, node_id, is_left):
if target == node_id:
return [(node_id, is_left)]
elif tree[node_id, 0] == -1:
return False
else:
left_result = get_branch(tree, target, tree[node_id, 0], True)
right_result = get_branch(tree, target, tree[node_id, 1], False)
if left_result:
return [(node_id, is_left)] + left_result
elif right_result:
return [(node_id, is_left)] + right_result
else:
return False
def grnn_embeddings(content, tree, params):
embeddings = {}
def _rec(node_id):
u_k = relu(np.dot(params["W_u"], content[node_id].T).T +
params["b_u"]).reshape(1, -1)
if tree[node_id, 0] == -1:
embeddings[node_id] = u_k
return u_k
else:
h_L = _rec(tree[node_id, 0])
h_R = _rec(tree[node_id, 1])
h = relu(np.dot(params["W_h"],
np.hstack((h_L, h_R, u_k)).T).T +
params["b_h"])
embeddings[node_id] = h
return h
_rec(0)
return embeddings
def make_dL_dh(content, tree, params, target):
branch = get_branch(tree, target, 0, True)
embeddings = grnn_embeddings(content, tree, params)
def f(v):
e = v
for i, (node_id, is_left) in enumerate(branch[::-1]):
if i == 0:
e = v
was_left = is_left
elif i > 0:
u_k = relu(np.dot(params["W_u"], content[node_id].T).T +
params["b_u"]).reshape(1, -1)
if was_left:
h_L = e
h_R = embeddings[tree[node_id, 1]]
else:
h_L = embeddings[tree[node_id, 0]]
h_R = e
h = relu(np.dot(params["W_h"],
np.hstack((h_L, h_R, u_k)).T).T +
params["b_h"])
e = h
was_left = is_left
h = e
h = relu(np.dot(params["W_clf"][0], h.T).T + params["b_clf"][0])
h = relu(np.dot(params["W_clf"][1], h.T).T + params["b_clf"][1])
h = sigmoid(np.dot(params["W_clf"][2], h.T).T + params["b_clf"][2])
return h.ravel()
df = ag.grad(f)
return df
def make_dL_du(content, tree, params, target):
branch = get_branch(tree, target, 0, True)
embeddings = grnn_embeddings(content, tree, params)
def f(v):
e = v
for i, (node_id, is_left) in enumerate(branch[::-1]):
if i == 0:
if tree[node_id, 0] == -1:
e = relu(np.dot(params["W_u"], v.T).T +
params["b_u"]).reshape(1, -1)
else:
u_k = relu(np.dot(params["W_u"], v.T).T +
params["b_u"]).reshape(1, -1)
h_L = embeddings[tree[node_id, 0]]
h_R = embeddings[tree[node_id, 1]]
h = relu(np.dot(params["W_h"],
np.hstack((h_L, h_R, u_k)).T).T +
params["b_h"])
e = h
was_left = is_left
elif i > 0:
u_k = relu(np.dot(params["W_u"], content[node_id].T).T +
params["b_u"]).reshape(1, -1)
if was_left:
h_L = e
h_R = embeddings[tree[node_id, 1]]
else:
h_L = embeddings[tree[node_id, 0]]
h_R = e
h = relu(np.dot(params["W_h"],
np.hstack((h_L, h_R, u_k)).T).T +
params["b_h"])
e = h
was_left = is_left
h = e
h = relu(np.dot(params["W_clf"][0], h.T).T + params["b_clf"][0])
h = relu(np.dot(params["W_clf"][1], h.T).T + params["b_clf"][1])
h = sigmoid(np.dot(params["W_clf"][2], h.T).T + params["b_clf"][2])
return h.ravel()
df = ag.grad(f)
return df
In [11]:
from graphviz import Digraph
import matplotlib as mpl
import matplotlib.cm as cm
norm = mpl.colors.Normalize(vmin=0.001, vmax=2.0)
cmap = cm.viridis_r
m = cm.ScalarMappable(norm=norm, cmap=cmap)
def number_to_color(x):
color = m.to_rgba(x)
return "#%.2x%.2x%.2x%.2x" % (int(255*color[0]),
int(255*color[1]),
int(255*color[2]),
int(255*color[3]))
def plot_jet_simple(params, jet, label=None):
content = tf.inverse_transform(jet["content"])
embeddings = grnn_embeddings(jet["content"], jet["tree"], params)
# Build graph recursively
dot = Digraph(graph_attr={"rank": "flow"},
edge_attr={"arrowsize": "0.5", "fontsize": "8.0"},
node_attr={"style": "filled"},
format="png")
df = make_dL_dh(jet["content"], jet["tree"], params, 0)
norm_root = (df(embeddings[0]) ** 2).sum() ** 0.5
if label:
dot.graph_attr["label"] = label
def _rec(jet, parent, node_id):
df = make_dL_du(jet["content"], jet["tree"], params, node_id)
norm_u = (df(jet["content"][node_id]) ** 2).sum() ** 0.5 / norm_root
df = make_dL_dh(jet["content"], jet["tree"], params, node_id)
norm_h = (df(embeddings[node_id]) ** 2).sum() ** 0.5 / norm_root
# Build subgraph
sub = Digraph(graph_attr={"rank": "flow"},
node_attr={"fixedsize": "true",
"label": "",
"height": "0.1",
"width": "0.1",
"style": "filled"},
edge_attr={"arrowsize": "0.5",
"fontsize": "8.0"})
size = "%.4f" % max(0.1, norm_h ** 0.5 / 2.0)
sub.node("%d" % node_id, width=size, height=size, shape="circle",
color=number_to_color(norm_h))
if jet["tree"][node_id, 0] == -1:
size = "%.4f" % max(0.1, norm_u / (2.0 ** 0.5))
sub.node("%d-N" % node_id, width=size, height=size, shape="diamond",
color=number_to_color(norm_u))
sub.edge("%d-N" % node_id, "%d" % node_id,
color=number_to_color(norm_u),
#label="o_%d" % (1+node_id)
)#label="%.4f" % norm_u)
dot.subgraph(sub)
# Connect to parent
if parent >= 0:
dot.edge("%d" % node_id, "%d" % parent,
color=number_to_color(norm_h),
#label="h_%d" % (1+node_id)
)
#label="%.4f" % norm_h)
# Recursive calls
if jet["tree"][node_id, 0] != -1:
_rec(jet, node_id, jet["tree"][node_id, 0])
_rec(jet, node_id, jet["tree"][node_id, 1])
_rec(jet, -1, jet["root_id"])
return dot
In [17]:
jet_id = np.argmin([len(j["tree"]) if len(j["tree"]) > 27 else np.inf for j in X])
print(jet_id)
dot = plot_jet_simple(params, X[jet_id],)
# label="y=%d, y_pred=%.4f" % (y[jet_id],
# grnn_predict_simple(params, [X[jet_id]])[0]))
In [18]:
dot
Out[18]:
In [19]:
dot.save("jet-antikt-%d.gv" % jet_id)
Out[19]:
In [32]:
np.random.seed(123)
for jet_id in np.random.permutation(len(X))[:20]:
dot = plot_jet_simple(params, X[jet_id],
label="y=%d, y_pred=%.4f" % (y[jet_id],
grnn_predict_simple(params, [X[jet_id]])[0]))
dot.render("figures/%d-anti-kt" % jet_id)