In [1]:
from __future__ import division
from sys import stdout
import numpy as np
import networkx as nx
import sklearn.metrics as met
from sklearn.metrics.pairwise import euclidean_distances
import matplotlib.pyplot as plt
from itertools import repeat
from Queue import Queue
from threading import Thread
from threading import current_thread
MIN_EXPANSION_SIZE = 50
MAX_DELETED_NEURONS = 3
#########################################################################################################################
##function to visualise graph
def visualise_graph(G, colours, layer):
## create new figure for graph plot
fig, ax = plt.subplots()
# graph layout
pos = nx.spring_layout(G)
#attributes in this graph
attributes = np.unique([v for k, v in nx.get_node_attributes(G, "assigned_community_layer_{}".format(layer)).items()])
# draw nodes -- colouring by cluster
for i in range(min(len(colours), len(attributes))):
node_list = [n for n in G.nodes() if G.node[n]["assigned_community_layer_{}".format(layer)] == attributes[i]]
colour = [colours[i] for n in range(len(node_list))]
nx.draw_networkx_nodes(G, pos, nodelist=node_list, node_color=colour)
#draw edges
nx.draw_networkx_edges(G, pos)
# draw labels
nx.draw_networkx_labels(G, pos)
#title of plot
plt.title('Nodes coloured by cluster, layer: {}'.format(layer))
#show plot
plt.show()
## visualise graph based on network clusters
def visualise_network(network, colours, layer):
#num neurons in lattice
num_neurons = len(network)
##create new figure for lattice plot
fig, ax = plt.subplots()
# graph layout
pos = nx.spring_layout(network)
# draw nodes -- colouring by cluster
for i in range(len(colours)):
nx.draw_networkx_nodes(network, pos, nodelist = [network.nodes()[i]], node_color = colours[i])
#draw edges
nx.draw_networkx_edges(network, pos)
# draw labels
nx.draw_networkx_labels(network, pos)
#label axes
plt.title('Neurons in lattice, layer: '+str(layer))
#show lattice plot
plt.show()
##########################################################################################################################
def initialise_weight(G):
print "INITIALISE WEIGHT, NUMBER OF NODES: {}, CONNECTED: {}".format(len(G), nx.is_connected(G))
n = G.nodes()[np.random.randint(len(G))]
neighbours = G.neighbors(n)
m = neighbours[np.random.randint(len(neighbours))]
return n, m, np.random.rand()
#function to generate real valued som for graph input
#three initial nodes
def initialise_network(ID, G, starting_nodes=3):
#network will be a one dimensional list
network = nx.Graph(ID = ID)
#initialise a network with just one neuron
network.add_nodes_from(range(1, starting_nodes + 1))
#id of nodes
for n in network.nodes():
network.node[n]["ID"] = "{}-{}".format(ID, str(n).zfill(2))
#connect nodes
for i in range(1, starting_nodes + 1):
for j in range(i + 1, starting_nodes + 1):
network.add_edge(i, j)
V = np.array([initialise_weight(G) for i in range(starting_nodes)])
#return network
return network, V
#########################################################################################################################
def precompute_sigmas(sigma, num_epochs):
return np.array([sigma * np.exp(-2 * sigma * e / num_epochs)
for e in range(num_epochs)])
##########################################################################################################################
##TODO
# function to train SOM on given graph
def train_network(G, network, V, num_epochs, eta, precomputed_sigmas,
precomputed_graph_shortest_paths, precomputed_graph_shortest_path_lengths):
#list if all patterns to visit
training_patterns = range(len(G))
#shortest path matrix
shortest_path = np.array(nx.floyd_warshall_numpy(network))
net_change = np.zeros(len(network))
for e in range(num_epochs):
#shuffle nodes
np.random.shuffle(training_patterns)
sigma = precomputed_sigmas[e]
# iterate through N nodes of graph
for i in training_patterns:
#data point to consider
x = G.nodes()[i]
#determine winning neuron
closest_neuron = winning_neuron(x, V, precomputed_graph_shortest_path_lengths)
# update weights
V, deltaV = update_weights(x, V, closest_neuron, shortest_path[closest_neuron], eta, precomputed_sigmas[e],
precomputed_graph_shortest_paths, precomputed_graph_shortest_path_lengths)
net_change += deltaV
print "NET CHANGE AFTER TRAINING"
print net_change
return V
##########################################################################################################################
def distance(x, v, precomputed_graph_shortest_path_lengths):
beta = np.float(v[2])
return min(precomputed_graph_shortest_path_lengths[v[0]][x] + beta,
precomputed_graph_shortest_path_lengths[v[1]][x] + 1 - beta)
# winning neuron
def winning_neuron(x, V, precomputed_graph_shortest_path_lengths):
distances = np.array([distance(x, v, precomputed_graph_shortest_path_lengths) for v in V])
return distances.argmin()
##########################################################################################################################
# function to update weights
def update_weights(x, V, winning_neuron, shortest_path_length, eta, sigma,
precomputed_graph_shortest_paths, precomputed_graph_shortest_path_lengths):
#weight update (vectorised)
deltaV = np.dot(np.diag(eta * np.exp(- shortest_path_length ** 2 / (2 * sigma ** 2))),
np.array([distance(x, v, precomputed_graph_shortest_path_lengths) for v in V]))
V = np.array([move_along_shortest_path(x, v, deltav, precomputed_graph_shortest_paths,
precomputed_graph_shortest_path_lengths) for v, deltav in zip(V, deltaV)])
return V, deltaV
##########################################################################################################################
def move_along_shortest_path(x, v, deltaV, precomputed_graph_shortest_paths,
precomputed_graph_shortest_path_lengths):
#unpack v
vi, vj, beta = v
beta = np.float(beta)
#weight of edge between vi and vj
weight = precomputed_graph_shortest_path_lengths[vi][vj]
#check if passing through vi or vj is quicker
if precomputed_graph_shortest_path_lengths[x][vi] + beta * weight < \
precomputed_graph_shortest_path_lengths[x][vj] + (1 - beta) * weight:
#passing through vi is quicker
beta = 1 - beta
path = [vj, vi]
path.extend(precomputed_graph_shortest_paths[vi][x])
else :
#passing through vj is quicker
path = [vi, vj]
path.extend(precomputed_graph_shortest_paths[vj][x])
#move along path until deltaV is less than edge weight
for vi, vj in zip(path, path[1:]):
#edge weight
weight = precomputed_graph_shortest_path_lengths[vi][vj]
if deltaV < weight * (1 - beta):
return vi, vj, beta + deltaV / weight
#decrease deltaV by (1 - beta of edge weight)
deltaV -= weight * (1 - beta)
beta = 0
return x, x, 0
#########################################################################################################################
# def move_along_shortest_path(x, v, deltaV, precomputed_graph_shortest_paths,
# precomputed_graph_shortest_path_lengths):
# #unpack v
# vi, vj, beta = v
# beta = np.float(beta)
# # four cases to consider
# if precomputed_graph_shortest_path_lengths[x][vi] + \
# beta * precomputed_graph_shortest_path_lengths[vi][vj] < \
# precomputed_graph_shortest_path_lengths[x][vj] + \
# (1 - beta) * precomputed_graph_shortest_path_lengths[vi][vj]:
# # moving towards vi
# #two cases here -- do we stay within (vi,vj) or move to next node?
# if deltaV < beta * precomputed_graph_shortest_path_lengths[vi][vj]:
# # can safely move towards vi
# new_beta = (beta * precomputed_graph_shortest_path_lengths[vi][vj] - deltaV) / \
# precomputed_graph_shortest_path_lengths[vi][vj]
# return vi, vj, new_beta
# else:
# # we are moving towards vi but have passed it -- so now we must adjust vi, vj and beta
# # finding new vi and vj
# # how far past vi have we moved?
# d = deltaV - beta * precomputed_graph_shortest_path_lengths[vi][vj]
# # next, we will find the next node in the shortest path
# path = precomputed_graph_shortest_paths[vi][x]
# i = 0;
# # vj will be second on the shortest path from vi to x
# new_vi = path[i]
# new_vj = path[min(i+1, len(path) - 1)]
# # keep moving vi and vj along path until remaining distance to move (d)
# # is less than dist[new_vi][new_vj]
# while d > precomputed_graph_shortest_path_lengths[new_vi][new_vj]:
# #reached target
# if new_vi == x or new_vj == x:
# # set weight to be input vector
# #set new vi to be input vector
# new_vi = x
# new_vj = x
# d = 0
# break
# # subtract distance between vi and vj
# d -= precomputed_graph_shortest_path_lengths[new_vi][new_vj]
# # increment i
# i += 1
# # update vi, vj
# new_vi = path[i]
# new_vj = path[i+1]
# # now d < dist[new_vi][new_vj]
# # calculate new beta
# #new_beta = (distance_function(new_vi, v) + beta * \
# # dist[vi][vj] - move_distance) / distance_function(new_vi, v)
# new_beta = d / precomputed_graph_shortest_path_lengths[new_vi][new_vj]
# return new_vi, new_vj, new_beta
# else:
# #moving towards vj
# #two cases here -- do we stay within (vi,vj) or move to next node?
# if deltaV < (1 - beta) * precomputed_graph_shortest_path_lengths[vi][vj]:
# #can move safely towards vj
# new_beta = (beta * precomputed_graph_shortest_path_lengths[vi][vj] + deltaV) / \
# precomputed_graph_shortest_path_lengths[vi][vj]
# return vi, vj, new_beta
# else:
# # we are moving towards vj but have passed it -- so now we must adjust vi, vj and beta
# # finding vi and vj
# # how far past vj have we moved?
# d = deltaV - (1 - beta) * precomputed_graph_shortest_path_lengths[vi][vj]
# # next, we will find the next node in the shortest path
# path = precomputed_graph_shortest_paths[vj][x]
# # initialise list index i
# i = 0;
# # vj will be second on the shortest path from vj to x
# new_vi = path[i]
# new_vj = path[min(i+1, len(path) - 1)]
# # keep moving vi and vj along path until remaining distance to move (d)
# # is less than dist[new_vi, new_vj]
# while d > precomputed_graph_shortest_path_lengths[new_vi][new_vj]:
# #reached target
# if new_vi == x or new_vj == x:
# # set weight to be input vector
# #set new vi to be input vector
# new_vi = x
# #
# new_vj = x
# #
# d = 0
# break
# #subtract distance between vi and vj
# d -= precomputed_graph_shortest_path_lengths[new_vi][new_vj]
# # increment i
# i += 1
# # move vi and vj along path
# new_vi = path[i]
# new_vj = path[i+1]
# # now d < dist[new_vi, new_vj]
# # calculate new beta
# #new_beta = (distance_function(new_vi, v) + (beta) * \
# # dist[vi][vj] - move_distance) / distance_function(new_vi, v)
# new_beta = d / precomputed_graph_shortest_path_lengths[new_vi][new_vj]
# return new_vi, new_vj, new_beta
########################################################################################################################
# assign nodes into clusters
def assign_nodes(G, network, V, precomputed_graph_shortest_path_lengths):
#distance from each datapoint (row) to each weight vector (column)
distances = np.array([[distance(x, v, precomputed_graph_shortest_path_lengths) for v in V] for x in G.nodes()])
#index of column giving minimum distance
arg_min_distances = np.argmin(distances, axis=1)
#minium distance for each datapoint
min_distances = np.diagonal(np.take(distances, arg_min_distances, axis=1))
#nodes corresponding to minimum index (of length len(X))
minimum_nodes = np.array([network.nodes()[n] for n in arg_min_distances])
#list of neurons with no assignments
empty_neurons = np.array([n for n in network.nodes() if n not in minimum_nodes])
if empty_neurons.size > 0:
################################################DELETION####################################################
print "DELETING NODES: {}".format(empty_neurons)
#neighbours of deleted neurons
neighbour_lists = np.array([network.neighbors(n) for n in empty_neurons])
#remove the nodes
network.remove_nodes_from(empty_neurons)
##remove from V
V = np.array([V[i] for i in range(len(V)) if i in arg_min_distances])
#compute distances between all neurons in input space
computed_neuron_distances = compute_neuron_distances(network, V, precomputed_graph_shortest_path_lengths)
##connect separated components
for neighbour_list in neighbour_lists:
connect_components(network, neighbour_list, computed_neuron_distances)
############################################################################################################
#array of errors
errors = np.array([np.mean(min_distances[minimum_nodes == n]) for n in network.nodes()])
#compute MQE
MQE = np.mean(errors)
#array of assignments
assignments = np.array([np.array([G.nodes()[i] for i in np.where(minimum_nodes == n)[0]]) for n in network.nodes()])
#zip zith nodes
errors = {n: e for n, e in zip(network.nodes(), errors)}
assignments = {n: a for n, a in zip(network.nodes(), assignments)}
nx.set_node_attributes(network, "e", errors)
nx.set_node_attributes(network, "ls", assignments)
return MQE, empty_neurons.size, V
##########################################################################################################################
def neuron_distances(v1, v2, precomputed_graph_shortest_path_lengths):
beta = np.float(v1[2])
return min(distance(v1[0], v2, precomputed_graph_shortest_path_lengths) + beta,
distance(v1[1], v2, precomputed_graph_shortest_path_lengths) + 1 - beta)
def compute_neuron_distances(network, V, precomputed_graph_shortest_path_lengths):
distances = np.array([[neuron_distances(v1, v2, precomputed_graph_shortest_path_lengths) for v2 in V] for v1 in V])
return {network.nodes()[i] : {network.nodes()[j] : distances[i, j] for j in range(len(distances[i]))}
for i in range(len(distances))}
#########################################################################################################################
def connect_components(network, neighbour_list, computed_neuron_distances):
sub_network = network.subgraph(neighbour_list)
connected_components = [sub_network.subgraph(c) for c in nx.connected_components(sub_network)]
number_of_connected_components = len(connected_components)
for i in range(number_of_connected_components):
connected_component_1 = connected_components[i].nodes()
for j in range(i + 1, number_of_connected_components):
connected_component_2 = connected_components[j].nodes()
distances = np.array([[computed_neuron_distances[n1][n2] for n2 in connected_component_2]
for n1 in connected_component_1])
min_n1, min_n2 = np.unravel_index(distances.argmin(), distances.shape)
network.add_edge(connected_component_1[min_n1],
connected_component_2[min_n2])
##########################################################################################################################
##function to identify neuron with greatest error
def identify_error_unit(network):
number_of_assignments = {k: len(v) for k, v in nx.get_node_attributes(network, "ls").items()}
errors = nx.get_node_attributes(network, "e")
errors = {k: v for k, v in errors.items() if number_of_assignments[k] > 1}
return max(errors, key=errors.get)
##########################################################################################################################
def expand_network(ID, G, network, V, error_unit, precomputed_graph_shortest_path_lengths):
#v goes to random vector in range of error unit
ls = network.node[error_unit]["ls"]
#weight of new neuron
w = initialise_weight(G.subgraph(ls))
#zip nodes and distances
distances = zip(network.nodes(), np.array([neuron_distances(w, v,
precomputed_graph_shortest_path_lengths) for v in V]))
#identify neighbour pointing closet
error_unit_neighbours = network.neighbors(error_unit)
#id of new node
new_node = max(network) + 1
#add new node to map
network.add_node(new_node)
##id
network.node[new_node]["ID"] = "{}-{}".format(ID, str(new_node).zfill(2))
#add edges to map
#connect error unit and new node
network.add_edge(error_unit, new_node)
if len(error_unit_neighbours) > 0:
##find closest neighbour
distances = {n: v for n, v in distances if n in error_unit_neighbours}
closest_neighbour = min(distances, key=distances.get)
#connect to error unit and closest neighbour
network.add_edge(closest_neighbour, new_node)
#add v to V
V = np.vstack([V, w])
return V
##########################################################################################################################
##########################################################################################################################
##GHSOM algorithm
def ghsom(ID, G, num_iter, eta, sigma, e_0, e_sg, e_en, q):
print "MQE_0={}, growth_target={}".format(e_0, e_0 * e_sg)
precomputed_graph_shortest_paths = nx.shortest_path(G)
precomputed_graph_shortest_path_lengths = {n1 :
{n2 : len(precomputed_graph_shortest_paths[n1][n2]) for n2 in precomputed_graph_shortest_paths.keys()}
for n1 in precomputed_graph_shortest_paths.keys()}
print "computed shortest paths"
if e_0 == np.inf:
starting_nodes = 1
else:
starting_nodes = 3
#create som for this neuron
network, V = initialise_network(ID, G, starting_nodes=starting_nodes)
print "initialised network"
#precompute sigmas
precomputed_sigmas = precompute_sigmas(sigma, num_iter)
print "precomputed sigmas"
#train for lamda epochs
V = train_network(G, network, V, num_iter, eta, precomputed_sigmas, precomputed_graph_shortest_paths,
precomputed_graph_shortest_path_lengths)
print "trained network"
#classify nodes and compute error
MQE, num_deleted_neurons, V = assign_nodes(G, network, V, precomputed_graph_shortest_path_lengths)
print "assigned nodes MQE={}, target={}".format(MQE, e_0 * e_sg)
##som growth phase
#repeat until error is low enough
while MQE > e_sg * e_0 and num_deleted_neurons < MAX_DELETED_NEURONS:
#find neuron with greatest error
error_unit = identify_error_unit(network)
print "identified error unit: {}".format(error_unit)
#expand network
V = expand_network(ID, G, network, V, error_unit, precomputed_graph_shortest_path_lengths)
print "expanded network"
#train for lam epochs
V = train_network(G, network, V, num_iter, eta, precomputed_sigmas, precomputed_graph_shortest_paths,
precomputed_graph_shortest_path_lengths)
print "trained network"
#calculate mean network error
MQE, deleted_neurons, V = assign_nodes(G, network, V, precomputed_graph_shortest_path_lengths)
num_deleted_neurons += deleted_neurons
print "assigned nodes MQE={}, target={}".format(MQE, e_0 * e_sg)
print "growth terminated, MQE: {}, target: {}, number of deleted neurons: {}".format(MQE, e_0 * e_sg, num_deleted_neurons)
##neuron expansion phase
#iterate thorugh all neruons and find neurons with error great enough to expand
for _, d in network.nodes(data=True):
#unpack
node_id = d["ID"]
ls = d["ls"]
e = d["e"]
#check error
if e_0 == np.inf or (e > e_en * e_0 and len(ls) > MIN_EXPANSION_SIZE and num_deleted_neurons < MAX_DELETED_NEURONS):
if e_0 == np.inf:
node_id = "01"
H = G.subgraph(ls)
print "submitted job: ID={}, e={}, number of nodes={}".format(node_id, e, len(ls))
#add these parameters to the queue
q.put((node_id, H, num_iter, eta, sigma, e, e_sg, e_en))
#return network
return network, MQE
##########################################################################################################################
##########################################################################################################################
def label_nodes(G, networks):
for _, network, _ in networks:
for _, d in network.nodes(data=True):
community = d["ID"]
layer = community.count("-")
assignment_string = "assigned_community_layer_{}".format(layer)
for node in d["ls"]:
G.node[node][assignment_string] = community
##########################################################################################################################
def NMI_one_layer(G, label, layer):
#actual community for this layer
actual_community_labels = np.array([v for k, v in nx.get_node_attributes(G, label).items()])
#predicted communitiy for this layer
predicted_community_labels = np.array([v for k, v in nx.get_node_attributes(G,
"assigned_community_layer_{}".format(layer)).items()])
print actual_community_labels
print predicted_community_labels
return met.normalized_mutual_info_score(actual_community_labels, predicted_community_labels)
def NMI_all_layers(G, labels):
return np.array([NMI_one_layer(G, labels[i], i + 1) for i in range(len(labels))])
##########################################################################################################################
## get embedding TERRIBLE but staying
def get_embedding(G):
# #get number of niodes in the graph
# num_nodes = nx.number_of_nodes(G)
# #dimension of embedding
# dim = 0
# while 'embedding'+str(dim) in G.node[G.nodes()[0]]:
# dim += 1
# #initialise embedding
# X = np.array([[d["embedding{}".format(j)] for j in range(dim)] for n, d in G.nodes(data=True)])
return np.array([v for k, v in nx.get_node_attributes(G, "embedding").items()])
##########################################################################################################################
def process_job(q, networks):
#unpack first element of queue
#contains all the para,eters for GHSOM
ID, G, num_iter, eta, sigma, e_0, e_sg, e_en = q.get()
#run GHSOM and return a network and MQE
n, e = ghsom(ID, G, num_iter, eta, sigma, e_0, e_sg, e_en, q)
#append result to networks list
networks.append((ID, n, e))
#mark task as done
q.task_done()
def worker(q, networks):
#continually poll queue for jobs
while True:
process_job(q, networks)
def main(params, filename, num_iter=10000, num_threads=1):
#network
G = nx.read_gpickle(filename)
##list of returned networks
networks = []
#initilise worker queue
q = Queue()
#add initial layer of ghsom to queue
q.put(("00", G, num_iter, params["eta"], params["sigma"], np.inf, params["e_sg"], params["e_en"]))
if num_threads > 1:
#initialise threads
for i in range(num_threads):
t = Thread(target=worker, args=(q, networks))
t.setDaemon(True)
t.start()
#finally wait until queue is empty and all tasks are done
q.join()
else :
#single thread
while not q.empty():
process_job(q, networks)
print "DONE"
return G, networks
In [2]:
# params = {'eta': 0.0001,
# 'sigma': 1,
# 'e_sg': 0.7,
# 'e_en': 1.0}
In [3]:
# %prun G, networks = main(params=params, filename="embedded_benchmark.gpickle", num_threads=1, num_iter=1000)
In [4]:
# label_nodes(G, networks[1:])
In [5]:
# NMI_all_layers(G, labels=["firstlevelcommunity"])
In [6]:
# _, network, _ = networks[1]
# colours = np.random.rand(len(network), 3)
In [7]:
# visualise_graph(G=G, colours=colours, layer=1)