In [1]:
%load_ext autoreload
%autoreload 2
In [2]:
from wb import WeightsAndBiases
from sklearn.preprocessing import LabelBinarizer
from random import sample, choice
from fingerprint_vect import GraphFingerprint
from collections import defaultdict
from autograd import grad
from autograd.scipy.misc import logsumexp
import autograd.numpy as np
import networkx as nx
import math
In [3]:
def make_random_graph(nodes, n_edges, features_dict):
"""
Makes a randomly connected graph.
"""
G = nx.Graph()
for n in nodes:
G.add_node(n, features=features_dict[n])
for i in range(n_edges):
u, v = sample(G.nodes(), 2)
G.add_edge(u, v)
return G
In [4]:
# features_dict will look like this:
# {0: array([1, 0, 0, 0, 0, 0, 0, 0, 0, 0]),
# 1: array([0, 1, 0, 0, 0, 0, 0, 0, 0, 0]),
# 2: array([0, 0, 1, 0, 0, 0, 0, 0, 0, 0]),
# 3: array([0, 0, 0, 1, 0, 0, 0, 0, 0, 0]),
# 4: array([0, 0, 0, 0, 1, 0, 0, 0, 0, 0]),
# 5: array([0, 0, 0, 0, 0, 1, 0, 0, 0, 0]),
# 6: array([0, 0, 0, 0, 0, 0, 1, 0, 0, 0]),
# 7: array([0, 0, 0, 0, 0, 0, 0, 1, 0, 0]),
# 8: array([0, 0, 0, 0, 0, 0, 0, 0, 1, 0]),
# 9: array([0, 0, 0, 0, 0, 0, 0, 0, 0, 1])}
all_nodes = [i for i in range(10)]
lb = LabelBinarizer()
features_dict = {i:lb.fit_transform(all_nodes)[i] for i in all_nodes}
G = make_random_graph(sample(all_nodes, 6), 5, features_dict)
G.edges(data=True)
# G.nodes(data=True)
Out[4]:
In [5]:
wb = WeightsAndBiases(n_layers=2, shapes=(10, 20, 10))
# wb
In [6]:
# def score(G):
# """
# The regressable score for each graph will be the sum of the
# (square root of each node + the sum of its neighbors.)
# """
# sum_score = 0
# for n, d in G.nodes(data=True):
# sum_score += math.sqrt(n)
# for nbr in G.neighbors(n):
# sum_score += nbr ** (1/3)
# return sum_score
def score(G):
"""
The regressable score for each graph is the number of nodes
in the graph.
"""
return len(G.nodes())
score(G)
Out[6]:
In [7]:
G.nodes(data=True)[0][1]['features'].shape
Out[7]:
In [8]:
def softmax(X, axis=0):
"""
The softmax function normalizes everything to between 0 and 1.
"""
return np.exp(X - logsumexp(X, axis=axis, keepdims=True))
# test softmax:
X = np.random.random((1,10))
softmax(X, axis=1)
Out[8]:
In [9]:
def relu(X):
"""
The ReLU - Rectified Linear Unit.
"""
return X * (X > 0)
# test relu:
X = np.random.normal(0, 1, size=(5, 1))
print(X)
print('')
print(relu(X))
In [45]:
# Make 1000 random graphs.
syngraphs = []
for i in range(20):
n_nodes = choice([i for i in range(2, 10)])
n_edges = choice([i for i in range(1, n_nodes**2)])
G = make_random_graph(sample(all_nodes, n_nodes), n_edges, features_dict)
syngraphs.append(G)
len(syngraphs)
Out[45]:
In [31]:
# Write a function that computes the feature matrix, and writes the
# indices to the nodes of each graph.
def stacked_node_activations(graphs):
"""
Note: this function should only be called for computing the
stacked node activations after initializing the graphs.
Inputs:
=======
- graphs: (list) a list of graphs on which to stack their
feature vectors.
"""
features = []
curr_idx = 0
for g in graphs:
for n, d in g.nodes(data=True):
features.append(d['features'])
g.node[n]['idx'] = curr_idx
curr_idx += 1
return np.vstack(features)
# test stacked_node_activations
layers = dict()
layers[0] = stacked_node_activations(syngraphs)
layers[1] = stacked_node_activations(syngraphs)
# layers[1]
In [32]:
# Write a function that gets the indices of each node's neighbors.
def neighbor_indices(G, n):
"""
Inputs:
=======
- G: the graph to which the node belongs to.
- n: the node inside the graph G.
Returns:
========
- indices: (list) a list of indices, which should (but is not
guaranteed to) correspond to a row in a large
stacked matrix of features.
"""
indices = []
for n in G.neighbors(n):
indices.append(G.node[n]['idx'])
return indices
# test neighbor_indices
nbr_idxs = neighbor_indices(syngraphs[0], syngraphs[0].nodes()[0])
nbr_idxs
Out[32]:
In [33]:
# Write a function that sums each of the neighbors' activations for a
# given node in a given graph.
def neighbor_activations(G, n, activations_dict, layer):
"""
Inputs:
=======
- G: the graph to which the node belongs to.
- n: the node inside the graph G
- activations_dict: a dictionary that stores the node activations
at each layer.
- layer: the layer at which to compute neighbor activations.
"""
nbr_indices = neighbor_indices(G, n)
return np.sum(activations_dict[layer][nbr_indices], axis=0)
neighbor_activations(syngraphs[0], syngraphs[0].nodes()[0], layers, 0)
Out[33]:
In [34]:
# Write a function that stacks each of the nodes' neighbors
# activations together into a large feature matrix.
def stacked_neighbor_activations(graphs, activations_dict, layer):
"""
Inputs:
=======
- graphs: (list) a list of NetworkX graphs.
- activations_dict: (dict) a dictionary where keys are the layer
number and values are the node activations.
Returns:
========
- a stacked numpy array of neighbor activations
"""
nbr_activations = []
for g in graphs:
for n in g.nodes():
nbr_acts = neighbor_activations(g, n, activations_dict, layer)
nbr_activations.append(nbr_acts)
return np.vstack(nbr_activations)
# stacked_neighbor_activations(syngraphs, layers, 1)
In [46]:
# Write a function that computes the next layers' activations.
def activation(activations_dict, wb, layer, graphs):
"""
Inputs:
=======
- activations_dict: (dict) a dictionary where keys are the layer
number and values are the node activations.
- wb: (wb.WeightsAndBiases) the WB class storing the weights and
biases.
- layer: (int) the layer for which to compute the activations.
Returns:
========
- a stacked numpy array of activations, which can be assigned to
the activations_dict's next layer if desired (actually it
should be).
"""
self_acts = activations_dict[layer]
self_acts = np.dot(self_acts, wb[layer]['self_weights'])
nbr_acts = stacked_neighbor_activations(graphs, activations_dict, layer)
nbr_acts = np.dot(nbr_acts, wb[layer]['nbr_weights'])
biases = wb[layer]['biases']
return relu(self_acts + nbr_acts + biases)
print(activation(layers, wb, 0, syngraphs))
In [36]:
act = np.dot(stacked_neighbor_activations(syngraphs, layers, 0), wb[0]['nbr_weights']) + wb[0]['biases']
act.shape
Out[36]:
In [37]:
# Write a function that gets the indices of all of the nodes in the
# graph.
def graph_indices(g):
"""
Returns the row indices of each of the nodes in the graphs.
"""
return [d['idx'] for _, d in g.nodes(data=True)]
In [38]:
# Write a function that makes the fingerprint used for predictions.
def fingerprint(activations_dict, graphs):
"""
Computes the final layer fingerprint for each graph.
Inputs:
=======
- activations_dict: (dict) a dictionary where keys are the layer
number and values are the node activations.
- graphs: a list of graphs for which to compute the fingerprints.
Returns:
========
- a stacked numpy array of fingerprints, of length len(graphs).
"""
top_layer = max(activations_dict.keys())
fingerprints = []
for g in graphs:
idxs = graph_indices(g)
fp = np.sum(activations_dict[top_layer][idxs], axis=0)
fingerprints.append(fp)
return relu(np.vstack(fingerprints))
# test fingerprint function
fingerprint(layers, syngraphs)
Out[38]:
In [39]:
# Write a function that makes the forward pass predictions.
def predict(wb_vect, wb_unflattener, activations_dict, graphs):
"""
Makes predictions.
Change this function for each new learning problem.
Inputs:
=======
- wb_vect: (WeightsAndBiases.vect)
- wb_unfalttener (WeightsAndBiases.unflattener)
- activations_dict: (dict) a dictionary where keys are the layer
number and values are the node activations.
- graphs: a list of graphs for which to compute the fingerprints.
Returns:
========
- a numpy array of predictions, of length len(graphs).
"""
wb = wb_unflattener(wb_vect)
for k in sorted(wb.keys()):
activations_dict[k + 1] = activation(activations_dict, wb, k, graphs)
# print(activations_dict[k])
top_layer = max(wb.keys())
fps = fingerprint(layers, graphs)
return np.dot(fps, wb[top_layer]['linweights'])
predict(*wb.flattened(), layers, syngraphs)
Out[39]:
In [40]:
# Write a function that computes the training loss.
def train_loss(wb_vect, wb_unflattener, activations_dict, graphs):
"""
Computes the training loss as mean squared error.
Inputs:
=======
- wb_vect: (WeightsAndBiases.vect)
- wb_unfalttener (WeightsAndBiases.unflattener)
- activations_dict: (dict) a dictionary where keys are the layer
number and values are the node activations.
- graphs: a list of graphs for which to compute the fingerprints.
Returns:
========
- mean squared error.
"""
scores = np.array([score(g) for g in graphs]).reshape((len(graphs), 1))
# print(scores)
preds = predict(wb_vect, wb_unflattener, activations_dict, graphs)
# print(preds)
return np.sum(np.power(preds - scores, 2)) / len(scores)
train_loss(wb.vect, wb.unflattener, layers, syngraphs)
Out[40]:
In [41]:
gradfunc = grad(train_loss, argnum=0)
gradfunc(wb.vect, wb.unflattener, layers, syngraphs)
Out[41]:
In [42]:
def sgd(grad, wb_vect, wb_unflattener, activations_dict, graphs, callback=None, num_iters=200, step_size=0.1, mass=0.9):
"""
Stochastic gradient descent with momentum.
"""
from time import time
velocity = np.zeros(len(wb_vect))
for i in range(num_iters):
start = time()
print(i)
g = grad(wb_vect, wb_unflattener, activations_dict, graphs)
velocity = mass * velocity - (1.0 - mass) * g
wb_vect += step_size * velocity
# print(wb_vect)
print(train_loss(wb_vect, wb_unflattener, activations_dict, graphs))
end = time()
print('Epoch Time: {0}'.format(end - start))
return wb_vect, wb_unflattener
wb_vect, wb_unflattener = sgd(gradfunc, wb.vect, wb.unflattener, layers, syngraphs, num_iters=200, step_size=0.001)
In [23]:
train_loss(wb_vect, wb.unflattener, layers, syngraphs)
Out[23]:
In [24]:
wb.unflattener(wb.vect)[2]['linweights']
Out[24]:
In [25]:
scores = [score(g) for g in syngraphs]
In [26]:
preds = predict(wb_vect, wb.unflattener, layers, syngraphs)
In [27]:
[i for i in zip(scores, preds)]
Out[27]:
In [28]:
new_graphs = [make_random_graph(sample(all_nodes, 4), 5, features_dict) for i in range(100)]
# predict(wb_vect, wb.unflattener, layers, new_graphs)
new_graphs[0].nodes(data=True)
stacked_node_activations(new_graphs)
Out[28]:
In [29]:
predict(wb_vect, wb.unflattener, layers, new_graphs)
In [ ]: