The EDeN library allows the vectorization of graphs, i.e. the transformation of graphs into sparse vectors.
The graphs that can be processed by the EDeN library have the following restrictions:
nodes and edges have identifiers: the following identifiers are used as reserved words
nodes and edges must have the 'label' attribute
the 'label' attribute can be of one of the following types:
strings are used to represent categorical values; dictionaries are used to represent sparse vectors: keys are of string type and values are of type float
In [1]:
%matplotlib inline
import pylab as plt
import networkx as nx
In [2]:
G=nx.Graph()
G.add_node(0, label='A')
G.add_node(1, label='B')
G.add_node(2, label='C')
G.add_edge(0,1, label='x')
G.add_edge(1,2, label='y')
G.add_edge(2,0, label='z')
In [3]:
from eden.util import display
print display.serialize_graph(G)
In [4]:
from eden.util import display
display.draw_graph(G, size=15, node_size=1500, font_size=24, node_border=True, size_x_to_y_ratio=3)
In [5]:
G=nx.Graph()
G.add_node(0, label=[0,0,.1])
G.add_node(1, label=[0,.1,0])
G.add_node(2, label=[.1,0,0])
G.add_edge(0,1, label='x')
G.add_edge(1,2, label='y')
G.add_edge(2,0, label='z')
In [6]:
display.draw_graph(G, size=15, node_size=1500, font_size=24, node_border=True, size_x_to_y_ratio=3)
In [7]:
G=nx.Graph()
G.add_node(0, label={'A':1, 'B':2, 'C':3})
G.add_node(1, label={'A':1, 'B':2, 'D':3})
G.add_node(2, label={'A':1, 'D':2, 'E':3})
G.add_edge(0,1, label='x')
G.add_edge(1,2, label='y')
G.add_edge(2,0, label='z')
In [8]:
display.draw_graph(G, size=15, node_size=1500, font_size=24, node_border=True, size_x_to_y_ratio=3)
In [9]:
G=nx.Graph()
G.add_node(0, label='A')
G.add_node(1, label='B')
G.add_node(2, label='C')
G.add_node(3, label='D')
G.add_node(4, label='E')
G.add_node(5, label='F')
G.add_edge(0,1, label='x')
G.add_edge(0,2, label='y')
G.add_edge(1,3, label='z', nesting=True)
G.add_edge(0,3, label='z', nesting=True)
G.add_edge(2,3, label='z', nesting=True)
G.add_edge(3,4, label='k')
G.add_edge(3,5, label='j')
In [10]:
display.draw_graph(G, size=15, node_size=1500, font_size=24, node_border=True, size_x_to_y_ratio=3, prog='circo')
Build graphs and then display them
In [11]:
import networkx as nx
graph_list = []
In [12]:
G=nx.Graph()
G.add_node(0, label='A', entity='CATEG')
G.add_node(1, label='B', entity='CATEG')
G.add_node(2, label='C', entity='CATEG')
G.add_edge(0,1, label='a', entity='CATEG_EDGE')
G.add_edge(1,2, label='b', entity='CATEG_EDGE')
graph_list += [G.copy()]
G=nx.Graph()
G.add_node(0, label='A', entity='CATEG')
G.add_node(1, label='B', entity='CATEG')
G.add_node(2, label='X', entity='CATEG')
G.add_edge(0,1, label='a', entity='CATEG_EDGE')
G.add_edge(1,2, label='b', entity='CATEG_EDGE')
graph_list += [G.copy()]
G=nx.Graph()
G.add_node(0, label='A', entity='CATEG')
G.add_node(1, label='B', entity='CATEG')
G.add_node(2, label='X', entity='CATEG')
G.add_edge(0,1, label='x', entity='CATEG_EDGE')
G.add_edge(1,2, label='x', entity='CATEG_EDGE')
graph_list += [G.copy()]
G=nx.Graph()
G.add_node(0, label='X', entity='CATEG')
G.add_node(1, label='X', entity='CATEG')
G.add_node(2, label='X', entity='CATEG')
G.add_edge(0,1, label='x', entity='CATEG_EDGE')
G.add_edge(1,2, label='x', entity='CATEG_EDGE')
graph_list += [G.copy()]
In [13]:
G=nx.Graph()
G.add_node(0, label=[1,0,0], entity='VEC')
G.add_node(1, label=[0,1,0], entity='VEC')
G.add_node(2, label=[0,0,1], entity='VEC')
G.add_edge(0,1, label='a', entity='CATEG_EDGE')
G.add_edge(1,2, label='b', entity='CATEG_EDGE')
graph_list += [G.copy()]
G=nx.Graph()
G.add_node(0, label=[1,1,0], entity='VEC')
G.add_node(1, label=[0,1,1], entity='VEC')
G.add_node(2, label=[0,0,1], entity='VEC')
G.add_edge(0,1, label='a', entity='CATEG_EDGE')
G.add_edge(1,2, label='b', entity='CATEG_EDGE')
graph_list += [G.copy()]
G=nx.Graph()
G.add_node(0, label=[1,0.1,0.2], entity='VEC')
G.add_node(1, label=[0.3,1,0.4], entity='VEC')
G.add_node(2, label=[0.5,0.6,1], entity='VEC')
G.add_edge(0,1, label='a', entity='CATEG_EDGE')
G.add_edge(1,2, label='b', entity='CATEG_EDGE')
graph_list += [G.copy()]
G=nx.Graph()
G.add_node(0, label=[0.1,0.2,0.3], entity='VEC')
G.add_node(1, label=[0.4,0.5,0.6], entity='VEC')
G.add_node(2, label=[0.7,0.8,0.9], entity='VEC')
G.add_edge(0,1, label='a', entity='CATEG_EDGE')
G.add_edge(1,2, label='b', entity='CATEG_EDGE')
graph_list += [G.copy()]
In [14]:
G=nx.Graph()
G.add_node(0, label={'A':1, 'B':1, 'C':1}, entity='SPVEC')
G.add_node(1, label={'a':1, 'B':1, 'C':1}, entity='SPVEC')
G.add_node(2, label={'a':1, 'b':1, 'C':1}, entity='SPVEC')
G.add_edge(0,1, label='a', entity='CATEG_EDGE')
G.add_edge(1,2, label='b', entity='CATEG_EDGE')
graph_list += [G.copy()]
G=nx.Graph()
G.add_node(0, label={'A':1, 'C':1, 'D':1}, entity='SPVEC')
G.add_node(1, label={'a':1, 'C':1, 'D':1}, entity='SPVEC')
G.add_node(2, label={'a':1, 'C':1, 'D':1}, entity='SPVEC')
G.add_edge(0,1, label='a', entity='CATEG_EDGE')
G.add_edge(1,2, label='b', entity='CATEG_EDGE')
graph_list += [G.copy()]
G=nx.Graph()
G.add_node(0, label={'A':1, 'D':1, 'E':1}, entity='SPVEC')
G.add_node(1, label={'a':1, 'D':1, 'E':1}, entity='SPVEC')
G.add_node(2, label={'a':1, 'D':1, 'E':1}, entity='SPVEC')
G.add_edge(0,1, label='a', entity='CATEG_EDGE')
G.add_edge(1,2, label='b', entity='CATEG_EDGE')
graph_list += [G.copy()]
G=nx.Graph()
G.add_node(0, label={'A':1, 'B':1, 'C':1, 'D':1, 'E':1}, entity='SPVEC')
G.add_node(1, label={'a':1, 'B':1, 'C':1, 'D':1, 'E':1}, entity='SPVEC')
G.add_node(2, label={'a':1, 'b':1, 'C':1, 'D':1, 'E':1}, entity='SPVEC')
G.add_edge(0,1, label='a', entity='CATEG_EDGE')
G.add_edge(1,2, label='b', entity='CATEG_EDGE')
graph_list += [G.copy()]
In [15]:
from eden.util import display
for g in graph_list:
display.draw_graph(g, size=5, node_size=800, node_border=1, layout='shell', secondary_vertex_label = 'entity')
Create a vector representation
In [17]:
%%time
from eden.graph import Vectorizer
vectorizer = Vectorizer(complexity=2, n=4 )
vectorizer.fit(graph_list)
from itertools import islice
X = vectorizer.transform(islice(graph_list,0,12))
print 'Instances: %d \nFeatures: %d with an avg of %d features per instance' % (X.shape[0], X.shape[1], X.getnnz()/X.shape[0])
In [18]:
from sklearn.svm import OneClassSVM
import numpy as np
def plot2D(X_reduced,labels):
size=11
plt.figure(figsize=(size,size))
#make mesh
x_min, x_max = X_reduced[:, 0].min(), X_reduced[:, 0].max()
y_min, y_max = X_reduced[:, 1].min(), X_reduced[:, 1].max()
step_num = 100
h = min( ( x_max - x_min ) / step_num , ( y_max - y_min ) / step_num )# step size in the mesh
b = h * 50 # border size
x_min, x_max = X_reduced[:, 0].min() - b, X_reduced[:, 0].max() + b
y_min, y_max = X_reduced[:, 1].min() - b, X_reduced[:, 1].max() + b
xx, yy = np.meshgrid( np.arange( x_min, x_max, h ), np.arange( y_min, y_max, h ) )
#induce a predictive model
clf = OneClassSVM( gamma = 10**3, nu = 0.01 )
clf.fit( X_reduced )
# Plot the decision boundary. For that, we will assign a color to each
# point in the mesh [x_min, m_max] . [y_min, y_max].
if hasattr(clf, "decision_function"):
Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])
else:
Z = clf.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:, 1]
# Put the result into a color plot
levels = np.linspace(min(Z), max(Z), 40)
Z = Z.reshape(xx.shape)
plt.contourf(xx, yy, Z, cmap=plt.get_cmap('YlOrRd'), alpha=.9,levels=levels)
plt.scatter(X_reduced[:, 0], X_reduced[:, 1],
alpha=.5,
s=70,
edgecolors='none',
c = 'white',
cmap = plt.get_cmap('YlOrRd'))
#labels
for id in range( X_reduced.shape[0] ):
label = labels[id]
x = X_reduced[id, 0]
y = X_reduced[id, 1]
plt.annotate(label,xy = (x,y), xytext = (-12, -3), textcoords = 'offset points')
plt.show()
In [19]:
%%time
import numpy as np
#make dense feature representation
n_components = max(2, X.shape[0])
from sklearn.kernel_approximation import Nystroem
feature_map_nystroem = Nystroem(gamma=0.1, n_components=n_components)
X_explicit=feature_map_nystroem.fit_transform(X)
# Visualize result using PCA
from sklearn.decomposition import TruncatedSVD
pca = TruncatedSVD(n_components=2)
X_reduced = pca.fit_transform(X_explicit)
2D plot using OneClass classifier to identify density curves
In [20]:
plot2D(X_reduced,range(X_reduced.shape[0]))
Compute pairwise similarity matrix
In [21]:
from ipy_table import *
def prep_table(K):
header = [' ']
header += [i for i in range(K.shape[0])]
mat = [header]
for id, row in enumerate(K):
new_row = [id]
new_row += list(row)
mat.append(new_row)
return mat
In [22]:
from sklearn import metrics
K=metrics.pairwise.pairwise_kernels(X, metric='linear')
mat=prep_table(K)
make_table(mat)
apply_theme('basic')
set_global_style(float_format = '%0.2f')
Out[22]: