The EDeN library allows the vectorization of graphs, i.e. the transformation of graphs into sparse vectors.
The graphs that can be processed by the EDeN library have the following restrictions:
nodes and edges have identifiers: the following identifiers are used as reserved words
nodes and edges must have the 'label' attribute
the 'label' attribute can be of one of the following types:
strings are used to represent categorical values; dictionaries are used to represent sparse vectors: keys are of string type and values are of type float
In [1]:
%matplotlib inline
import pylab as plt
import networkx as nx
In [2]:
G=nx.Graph()
G.add_node(0, label='A')
G.add_node(1, label='B')
G.add_node(2, label='C')
G.add_edge(0,1, label='x')
G.add_edge(1,2, label='y')
G.add_edge(2,0, label='z')
In [3]:
from eden.util import display
print display.serialize_graph(G)
In [4]:
from eden.util import display
display.draw_graph(G, size=15, node_size=1500, font_size=24, node_border=True, size_x_to_y_ratio=3)
In [5]:
G=nx.Graph()
G.add_node(0, label=[0,0,.1])
G.add_node(1, label=[0,.1,0])
G.add_node(2, label=[.1,0,0])
G.add_edge(0,1, label='x')
G.add_edge(1,2, label='y')
G.add_edge(2,0, label='z')
In [6]:
display.draw_graph(G, size=15, node_size=1500, font_size=24, node_border=True, size_x_to_y_ratio=3)
In [7]:
G=nx.Graph()
G.add_node(0, label={'A':1, 'B':2, 'C':3})
G.add_node(1, label={'A':1, 'B':2, 'D':3})
G.add_node(2, label={'A':1, 'D':2, 'E':3})
G.add_edge(0,1, label='x')
G.add_edge(1,2, label='y')
G.add_edge(2,0, label='z')
In [8]:
display.draw_graph(G, size=15, node_size=1500, font_size=24, node_border=True, size_x_to_y_ratio=3)
In [9]:
G=nx.Graph()
G.add_node(0, label='A')
G.add_node(1, label='B')
G.add_node(2, label='C')
G.add_node(3, label='D')
G.add_node(4, label='E')
G.add_node(5, label='F')
G.add_edge(0,1, label='x')
G.add_edge(0,2, label='y')
G.add_edge(1,3, label='z', nesting=True, weight=.5)
G.add_edge(0,3, label='z', nesting=True, weight=.1)
G.add_edge(2,3, label='z', nesting=True, weight=.01)
G.add_edge(3,4, label='k')
G.add_edge(3,5, label='j')
In [10]:
display.draw_graph(G, size=15, node_size=1500, font_size=24, node_border=True, size_x_to_y_ratio=3, prog='circo')
In [11]:
from eden.graph import Vectorizer
X=Vectorizer(2).transform_single(G)
from eden.util import describe
print describe(X)
print X
In [12]:
G=nx.Graph()
G.add_node(0, label='A')
G.add_node(1, label='B')
G.add_node(2, label='C')
G.add_node(3, label='D')
G.add_node(4, label='E')
G.add_node(5, label='F')
G.add_edge(0,1, label='x')
G.add_edge(0,2, label='y')
G.add_edge(1,3, label='z', nesting=True)
G.add_edge(0,3, label='z', nesting=True)
G.add_edge(2,3, label='z', nesting=True)
G.add_edge(3,4, label='k')
G.add_edge(3,5, label='j')
from eden.graph import Vectorizer
X=Vectorizer(2).transform_single(G)
from eden.util import describe
print describe(X)
print X
Build graphs and then display them
In [13]:
import networkx as nx
graph_list = []
In [14]:
G=nx.Graph()
G.add_node(0, label='A', entity='CATEG')
G.add_node(1, label='B', entity='CATEG')
G.add_node(2, label='C', entity='CATEG')
G.add_edge(0,1, label='a', entity='CATEG_EDGE')
G.add_edge(1,2, label='b', entity='CATEG_EDGE')
graph_list += [G.copy()]
G=nx.Graph()
G.add_node(0, label='A', entity='CATEG')
G.add_node(1, label='B', entity='CATEG')
G.add_node(2, label='X', entity='CATEG')
G.add_edge(0,1, label='a', entity='CATEG_EDGE')
G.add_edge(1,2, label='b', entity='CATEG_EDGE')
graph_list += [G.copy()]
G=nx.Graph()
G.add_node(0, label='A', entity='CATEG')
G.add_node(1, label='B', entity='CATEG')
G.add_node(2, label='X', entity='CATEG')
G.add_edge(0,1, label='x', entity='CATEG_EDGE')
G.add_edge(1,2, label='x', entity='CATEG_EDGE')
graph_list += [G.copy()]
G=nx.Graph()
G.add_node(0, label='X', entity='CATEG')
G.add_node(1, label='X', entity='CATEG')
G.add_node(2, label='X', entity='CATEG')
G.add_edge(0,1, label='x', entity='CATEG_EDGE')
G.add_edge(1,2, label='x', entity='CATEG_EDGE')
graph_list += [G.copy()]
In [15]:
G=nx.Graph()
G.add_node(0, label=[1,0,0], entity='VEC')
G.add_node(1, label=[0,1,0], entity='VEC')
G.add_node(2, label=[0,0,1], entity='VEC')
G.add_edge(0,1, label='a', entity='CATEG_EDGE')
G.add_edge(1,2, label='b', entity='CATEG_EDGE')
graph_list += [G.copy()]
G=nx.Graph()
G.add_node(0, label=[1,1,0], entity='VEC')
G.add_node(1, label=[0,1,1], entity='VEC')
G.add_node(2, label=[0,0,1], entity='VEC')
G.add_edge(0,1, label='a', entity='CATEG_EDGE')
G.add_edge(1,2, label='b', entity='CATEG_EDGE')
graph_list += [G.copy()]
G=nx.Graph()
G.add_node(0, label=[1,0.1,0.2], entity='VEC')
G.add_node(1, label=[0.3,1,0.4], entity='VEC')
G.add_node(2, label=[0.5,0.6,1], entity='VEC')
G.add_edge(0,1, label='a', entity='CATEG_EDGE')
G.add_edge(1,2, label='b', entity='CATEG_EDGE')
graph_list += [G.copy()]
G=nx.Graph()
G.add_node(0, label=[0.1,0.2,0.3], entity='VEC')
G.add_node(1, label=[0.4,0.5,0.6], entity='VEC')
G.add_node(2, label=[0.7,0.8,0.9], entity='VEC')
G.add_edge(0,1, label='a', entity='CATEG_EDGE')
G.add_edge(1,2, label='b', entity='CATEG_EDGE')
graph_list += [G.copy()]
In [16]:
G=nx.Graph()
G.add_node(0, label={'A':1, 'B':1, 'C':1}, entity='SPVEC')
G.add_node(1, label={'a':1, 'B':1, 'C':1}, entity='SPVEC')
G.add_node(2, label={'a':1, 'b':1, 'C':1}, entity='SPVEC')
G.add_edge(0,1, label='a', entity='CATEG_EDGE')
G.add_edge(1,2, label='b', entity='CATEG_EDGE')
graph_list += [G.copy()]
G=nx.Graph()
G.add_node(0, label={'A':1, 'C':1, 'D':1}, entity='SPVEC')
G.add_node(1, label={'a':1, 'C':1, 'D':1}, entity='SPVEC')
G.add_node(2, label={'a':1, 'C':1, 'D':1}, entity='SPVEC')
G.add_edge(0,1, label='a', entity='CATEG_EDGE')
G.add_edge(1,2, label='b', entity='CATEG_EDGE')
graph_list += [G.copy()]
G=nx.Graph()
G.add_node(0, label={'A':1, 'D':1, 'E':1}, entity='SPVEC')
G.add_node(1, label={'a':1, 'D':1, 'E':1}, entity='SPVEC')
G.add_node(2, label={'a':1, 'D':1, 'E':1}, entity='SPVEC')
G.add_edge(0,1, label='a', entity='CATEG_EDGE')
G.add_edge(1,2, label='b', entity='CATEG_EDGE')
graph_list += [G.copy()]
G=nx.Graph()
G.add_node(0, label={'A':1, 'B':1, 'C':1, 'D':1, 'E':1}, entity='SPVEC')
G.add_node(1, label={'a':1, 'B':1, 'C':1, 'D':1, 'E':1}, entity='SPVEC')
G.add_node(2, label={'a':1, 'b':1, 'C':1, 'D':1, 'E':1}, entity='SPVEC')
G.add_edge(0,1, label='a', entity='CATEG_EDGE')
G.add_edge(1,2, label='b', entity='CATEG_EDGE')
graph_list += [G.copy()]
In [17]:
from eden.util import display
for g in graph_list:
display.draw_graph(g, size=5, node_size=800, node_border=1, layout='shell', secondary_vertex_label = 'entity')
Create a vector representation
In [18]:
%%time
from eden.graph import Vectorizer
vectorizer = Vectorizer(complexity=2, n=4)
vectorizer.fit(graph_list)
X = vectorizer.transform(graph_list)
y=[1]*4+[2]*4+[3]*4
print 'Instances: %d \nFeatures: %d with an avg of %d features per instance' % (X.shape[0], X.shape[1], X.getnnz()/X.shape[0])
In [23]:
opts={'knn': 3, 'metric': 'rbf', 'k_threshold': 0.7, 'gamma': 1e-2}
from eden.embedding import display_embedding, embedding_quality
print 'Embedding quality [adjusted Rand index]: %.2f data: %s #classes: %d' % (embedding_quality(X, y, opts), X.shape, len(set(y)))
display_embedding(X,y, opts)
Compute pairwise similarity matrix
In [20]:
from ipy_table import *
def prep_table(K):
header = [' ']
header += [i for i in range(K.shape[0])]
mat = [header]
for id, row in enumerate(K):
new_row = [id]
new_row += list(row)
mat.append(new_row)
return mat
In [21]:
from sklearn import metrics
K=metrics.pairwise.pairwise_kernels(X, metric='linear')
mat=prep_table(K)
make_table(mat)
apply_theme('basic')
set_global_style(float_format = '%0.2f')
Out[21]:
In [ ]: