In [1]:
%load_ext autoreload
%autoreload 2
from eden.util import configure_logging
import logging
BABELDRAW=False
DEBUG=False
NJOBS=4
if DEBUG: NJOBS=1
configure_logging(logging.getLogger(),verbosity=1+DEBUG)
from IPython.core.display import HTML
HTML('<style>.container { width:95% !important; }</style>')
Out[1]:
In [2]:
%matplotlib inline
In [3]:
# get data
from eden.converter.graph.gspan import gspan_to_eden
from itertools import islice
def get_graphs(dataset_fname='../toolsdata/bursi.pos.gspan', size=100):
return islice(gspan_to_eden(dataset_fname),size)
In [ ]:
In [4]:
from graphlearn.utils import draw
import graphlearn.abstract_graphs.minortransform as transform
import graphlearn.abstract_graphs.minordecompose as decompose
from eden.graph import Vectorizer
from sklearn.cluster import MiniBatchKMeans
from sklearn.cluster import KMeans
import math
#preparing
v=Vectorizer(complexity=3)
make_decomposer = decompose.make_decomposergen(include_base=False, base_thickness_list=[2])
# nodes in all graphs get scored.
# the default functionality is to take all scores and cluster them
# such that nodes that get assigned the same cluster can be contracted in a minor graph.
# ShapeCluster is going the lazy route and uses the score of the node directly for the clusterid
class ShapeCluster:
def fit(self,li):
pass
def predict(self,i):
return [math.ceil(i)]
pp=transform.GraphMinorTransformer(#core_shape_cluster =KMeans(n_clusters=4),
core_shape_cluster =ShapeCluster(),
name_cluster =MiniBatchKMeans(n_clusters=6),
save_graphclusters =True,
shape_score_threshold=2.5,
shape_min_size=2)
pp.set_param(v)
# the magic happens here
decomposers=[make_decomposer(v,x) for x in pp.fit_transform(get_graphs(size=200))]
# lets look at some clusters
if False:
for cluster_id in pp.graphclusters:
print('cluster id: %d num: %d' % (cluster_id, len(pp.graphclusters[cluster_id])))
if cluster_id != -1:
draw.graphlearn(pp.graphclusters[cluster_id][:7], n_graphs_per_line=7,
size=6, vertex_color='_label_', prog='neato', colormap='Set3',
contract=False,edge_label='label')
#lets draw what we did there
for i in range(3):
draw.graphlearn([decomposers[i+5].pre_vectorizer_graph(nested=True),decomposers[i+5].base_graph(),decomposers[i+5].abstract_graph()],
size=10,
contract=True,
abstract_color='red',
vertex_label='label',nesting_edge_alpha=0.7)
In [ ]:
In [5]:
#parameters
radius_list=[0,2]
thickness_list=[2,4]
base_thickness_list=[2]
#extract
cips=decomposers[0].all_core_interface_pairs(thickness_list=[2],radius_list=[0,1],hash_bitmask=2**20-1)
#draw
draw.graphlearn([cips[0][0].graph,cips[0][1].graph], contract=False)
In [6]:
%%time
from graphlearn.graphlearn import Sampler as graphlearn_sampler
graphs = get_graphs(size=1000)
sampler=graphlearn_sampler(radius_list=[0,1],
thickness_list=[1],
min_cip_count=2,
min_interface_count=2,
decomposergen=make_decomposer,
graphtransformer=transform.GraphMinorTransformer(
core_shape_cluster =ShapeCluster(),
name_cluster =MiniBatchKMeans(n_clusters=6),
save_graphclusters =True)
sampler.fit(graphs,grammar_n_jobs=NJOBS)
print 'done'
In [ ]:
draw.draw_grammar(sampler.lsgg.productions,n_productions=5,n_graphs_per_production=5,
n_graphs_per_line=5, size=9, contract=False,
colormap='Paired', invert_colormap=False,node_border=1,
vertex_alpha=0.6, edge_alpha=0.5, node_size=450, abstract_interface=True)
In [ ]:
%%time
import graphlearn.utils.draw as draw
import itertools
#parameters
graphs = get_graphs()
id_start=15
id_end=id_start+9
graphs = itertools.islice(graphs,id_start,id_end)
n_steps=50
# sampling with many arguments.
graphs = sampler.sample(graphs,
n_samples=5,
batch_size=1,
n_steps=n_steps,
n_jobs=1,
quick_skip_orig_cip=False,
probabilistic_core_choice=True,
burnin=0,
improving_threshold=0.5,
select_cip_max_tries=100,
keep_duplicates=True,
include_seed=True)
scores=[]
ids=range(id_start,id_end)
for i,path_graphs in enumerate(graphs):
# for each sampling path:
print 'Graph id: %d'%(ids[i])
#collect scores so that we can display the score graph later
scores.append(sampler.monitors[i].sampling_info['score_history'])
# show graphs
if not BABELDRAW:
draw.graphlearn(path_graphs,
n_graphs_per_line=5, size=10,
colormap='Paired', invert_colormap=False,node_border=0.5, vertex_color='_label_',
vertex_alpha=0.5, edge_alpha=0.7, node_size=450)
else:
from graphlearn.utils import openbabel
openbabel.draw(path_graphs)
In [ ]:
%matplotlib inline
from itertools import islice
import numpy as np
import matplotlib.pyplot as plt
step=1
num_graphs_per_plot=3
num_plots=np.ceil([len(scores)/num_graphs_per_plot])
for i in range(num_plots):
plt.figure(figsize=(10,5))
for j,score in enumerate(scores[i*num_graphs_per_plot:i*num_graphs_per_plot+num_graphs_per_plot]):
data = list(islice(score,None, None, step))
plt.plot(data, label='graph %d'%(j+i*num_graphs_per_plot+id_start))
plt.legend(loc='lower right')
plt.grid()
plt.ylim(-0.1,1.1)
plt.show()