In [ ]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
from eden.util import configure_logging
import logging
configure_logging(logging.getLogger(),verbosity=1)
!date
BABELDRAW=False
In [ ]:
# get data
from eden.io.gspan import gspan_to_eden
from itertools import islice
def get_graphs(dataset_fname, size=100):
return islice(gspan_to_eden(dataset_fname),size)
dataset_fname = '../../toolsdata/bursi.pos.gspan'
In [ ]:
%%time
'''
TRAINING A SAMPLER
'''
from graphlearn.graphlearn import Sampler
from eden.graph import Vectorizer
from graphlearn.localsubstitutablegraphgrammar import LocalSubstitutableGraphGrammar
# to demonstrate pluggable regressor
#from graphlearn.estimate import OneClassEstimator
#from sklearn.linear_model import LinearRegression
# train a sampler
sampler=Sampler(
#vectorizer=Vectorizer(3),
#estimator = OneClassEstimator(classifier=LinearRegression()))
grammar=LocalSubstitutableGraphGrammar(radius_list=[0, 1],
thickness_list=[1,2],
min_cip_count=2,
min_interface_count=2),
size_diff_core_filter=-1,
core_choice_byfrequency=False,
core_choice_byscore=False,
core_choice_bytrial=False,
core_choice_bytrial_multiplier=1.0,
size_constrained_core_choice=-1,
similarity=-1,
n_samples=None,
proposal_probability=False,
batch_size=10,
n_jobs=0,
orig_cip_max_positives=1,
orig_cip_min_positives=0,
n_steps=50,
quick_skip_orig_cip=False,
improving_threshold_fraction=-1,
improving_linear_start_fraction=0,
accept_static_penalty=0.0,
accept_min_similarity=0.0,
select_cip_max_tries=20,
burnin=0,
backtrack=0,
include_seed=False,
keep_duplicates=False,
monitor=True
)
#sampler=Sampler()
# get training graphs
training_graphs = get_graphs(dataset_fname, size=200)
sampler.fit(training_graphs)
# lets look at a few stats about the trained sampler
print('graph grammar stats:')
n_instances, interface_counts, core_counts, cip_counts = sampler.grammar().size()
print('#instances: %d #interfaces: %d #cores: %d #core-interface-pairs: %d' % (n_instances, interface_counts, core_counts, cip_counts))
#sampler.save('tmp/sampler.ge')
In [ ]:
%%time
from itertools import islice
"""
USING A SAMPLER
"""
# reloading, this is not necessary actually since we sould still have the object in memory
from graphlearn.graphlearn import Sampler
#sampler.load('tmp/sampler.ge')
# get a graph list and pic some graphs to initialize the sampler with.
# the sampler will look at each graphs and try n_steps times to alter it.
graphs = get_graphs(dataset_fname, size=100)
id_start=0
id_end=id_start+2
input_graphs = islice(graphs,id_start,id_end)
# sample parameters
n_steps=60 # how many steps
n_samples=4 # collect this many samples during the process
# sampler will return lists of graphs as you see below.
graphs = sampler.transform(input_graphs)
In [ ]:
%%time
# plot examples of sampling paths
scores=[]
ids=range(id_start,id_end)
for i,graphlist in enumerate(graphs):
print 'Graph id: %d'%(ids[i])
scores.append(sampler.monitors[i].sampling_info['score_history'])
if BABELDRAW:
from graphlearn.utils import openbabel
openbabel.draw(graphlist, d3=False, n_graphs_per_line=6,size=200)
else:
from graphlearn.utils import draw
draw.graphlearn(graphlist,contract=True,#vertex_label='id',
n_graphs_per_line=6, size=10,
colormap='Paired', invert_colormap=False, vertex_color='_labels_',
vertex_alpha=0.5, edge_alpha=0.2)
In [ ]:
# we sampled with monitoring mode enabled,
#this is why we could now look at what exactly happened during sampling
#this is step 9 of the 1st graph
#sampler.monitors[1][9]
In [ ]:
%%time
# plot sampling path score
from itertools import islice
import numpy as np
import pylab as plt
markevery=n_steps/(n_samples)
step=1
num_graphs_per_plot=3
num_plots=np.ceil([len(scores)/num_graphs_per_plot])
for i in range(num_plots):
plt.figure(figsize=(13,5))
for j,score in enumerate(scores[i*num_graphs_per_plot:i*num_graphs_per_plot+num_graphs_per_plot]):
data = list(islice(score,None, None, step))
plt.plot(data, linewidth=2, label='graph %d'%(j+i*num_graphs_per_plot+id_start))
plt.plot(data, linestyle='None',markevery=markevery, markerfacecolor='white', marker='o', markeredgewidth=2,markersize=6)
plt.legend(loc='lower right')
plt.grid()
plt.xlim(-1,n_steps+1)
plt.ylim(-0.1,1.1)
plt.show()