Consider a binary classification problem. We will fit a predictor and use it to assign a weight score to each node in each instance; this operation is referred to as "annotation". For illustration purposes we will display a few annotated graphs. We will see that building a predictor on the annotated instances can increase the predictive performance.
load data and convert it to graphs
In [1]:
    
pos = 'bursi.pos.gspan'
neg = 'bursi.neg.gspan'
from eden.converter.graph.gspan import gspan_to_eden
iterable_pos = gspan_to_eden( pos )
iterable_neg = gspan_to_eden( neg )
#split train/test
train_test_split=0.9
from eden.util import random_bipartition_iter
iterable_pos_train, iterable_pos_test = random_bipartition_iter(iterable_pos, relative_size=train_test_split)
iterable_neg_train, iterable_neg_test = random_bipartition_iter(iterable_neg, relative_size=train_test_split)
    
setup the vectorizer
In [12]:
    
from eden.graph import Vectorizer
vectorizer = Vectorizer( complexity=2 )
    
In [13]:
    
%%time
from itertools import tee
iterable_pos_train,iterable_pos_train_=tee(iterable_pos_train)
iterable_neg_train,iterable_neg_train_=tee(iterable_neg_train)
iterable_pos_test,iterable_pos_test_=tee(iterable_pos_test)
iterable_neg_test,iterable_neg_test_=tee(iterable_neg_test)
from eden.util import fit,estimate
estimator = fit(iterable_pos_train_, iterable_neg_train_, vectorizer, n_iter_search=5)
estimate(iterable_pos_test_, iterable_neg_test_, estimator, vectorizer)
    
    
annotate instances and list all resulting graphs
display one graph as an example. Color the vertices using the annotated 'importance' attribute.
In [14]:
    
help(vectorizer.annotate)
    
    
In [15]:
    
%matplotlib inline
from itertools import tee
iterable_pos_train,iterable_pos_train_=tee(iterable_pos_train)
graphs = vectorizer.annotate( iterable_pos_train_, estimator=estimator )
import itertools 
graphs = itertools.islice( graphs, 3 )
from eden.util.display import draw_graph
for graph in graphs: draw_graph( graph, vertex_color='importance', size=10 )
    
    
    
    
In [16]:
    
%matplotlib inline
from itertools import tee
iterable_pos_train,iterable_pos_train_=tee(iterable_pos_train)
graphs = vectorizer.annotate( iterable_pos_train_, estimator=estimator )
from eden.modifier.graph.vertex_attributes import colorize_binary
graphs = colorize_binary(graph_list = graphs, output_attribute = 'color_value', input_attribute='importance', level=0)
import itertools 
graphs = itertools.islice( graphs, 3 )
from eden.util.display import draw_graph
for graph in graphs: draw_graph( graph, vertex_color='color_value', size=10 )
    
    
    
    
Create a data matrix this time using the annotated graphs. Note that now graphs are weighted.
Evaluate the predictive performance on the weighted graphs.
In [17]:
    
%%time
a_estimator=estimator
num_iterations = 3
reweight = 0.6
for i in range(num_iterations):
    print 'Iteration %d'%i
    
    from itertools import tee
    iterable_pos_train_=vectorizer.annotate( iterable_pos_train, estimator=a_estimator, reweight=reweight )
    iterable_neg_train_=vectorizer.annotate( iterable_neg_train, estimator=a_estimator, reweight=reweight )
    iterable_pos_test_=vectorizer.annotate( iterable_pos_test, estimator=a_estimator, reweight=reweight )
    iterable_neg_test_=vectorizer.annotate( iterable_neg_test, estimator=a_estimator, reweight=reweight )
    
    iterable_pos_train,iterable_pos_train_=tee(iterable_pos_train_)
    iterable_neg_train,iterable_neg_train_=tee(iterable_neg_train_)
    iterable_pos_test,iterable_pos_test_=tee(iterable_pos_test_)
    iterable_neg_test,iterable_neg_test_=tee(iterable_neg_test_)
    from eden.util import fit,estimate
    a_estimator = fit(iterable_pos_train_, iterable_neg_train_, vectorizer)
    estimate(iterable_pos_test_, iterable_neg_test_, a_estimator, vectorizer)