Sampling

we sample from chemical graphs.

First we will fit a sampler and then we will generate new graphs :)

initialising logging and notebook



In [ ]:

    
%load_ext autoreload
%autoreload 2
%matplotlib inline
from eden.util import configure_logging
import logging
configure_logging(logging.getLogger(),verbosity=1)
!date
BABELDRAW=False

fit sampler i.e. fit estimator and grammar



In [ ]:

    
# get data
from eden.io.gspan import gspan_to_eden
from itertools import islice
def get_graphs(dataset_fname, size=100):
    return  islice(gspan_to_eden(dataset_fname),size)

dataset_fname = '../../toolsdata/bursi.pos.gspan'



In [ ]:

    
%%time
'''
TRAINING A SAMPLER
'''
from graphlearn.graphlearn import  Sampler
from eden.graph import Vectorizer
from graphlearn.localsubstitutablegraphgrammar import LocalSubstitutableGraphGrammar

# to demonstrate pluggable regressor
#from graphlearn.estimate import OneClassEstimator
#from sklearn.linear_model import LinearRegression 



# train a sampler
sampler=Sampler(
                    
                          #vectorizer=Vectorizer(3),
                          #estimator = OneClassEstimator(classifier=LinearRegression()))
                 grammar=LocalSubstitutableGraphGrammar(radius_list=[0, 1],
                                                        thickness_list=[1,2],
                                                        min_cip_count=2,
                                                        min_interface_count=2), 
                 size_diff_core_filter=-1,
                 core_choice_byfrequency=False,
                 core_choice_byscore=False,
                 core_choice_bytrial=False,
                 core_choice_bytrial_multiplier=1.0,

                 size_constrained_core_choice=-1,
                 similarity=-1,
                 n_samples=None,
                 proposal_probability=False,
                 batch_size=10,
                 n_jobs=0,

                 orig_cip_max_positives=1,
                 orig_cip_min_positives=0,

                 n_steps=50,
                 quick_skip_orig_cip=False,
                 improving_threshold_fraction=-1,
                 improving_linear_start_fraction=0,
                 accept_static_penalty=0.0,
                 accept_min_similarity=0.0,
                 select_cip_max_tries=20,
                 burnin=0,
                 backtrack=0,

                 include_seed=False,
                 keep_duplicates=False,

                 monitor=True
         
               )

#sampler=Sampler()

# get training graphs
training_graphs = get_graphs(dataset_fname, size=200)
sampler.fit(training_graphs)



# lets look at a few stats about the trained sampler
print('graph grammar stats:')
n_instances, interface_counts, core_counts, cip_counts = sampler.grammar().size()
print('#instances: %d   #interfaces: %d   #cores: %d   #core-interface-pairs: %d' % (n_instances, interface_counts, core_counts, cip_counts))
#sampler.save('tmp/sampler.ge')

Sample and show generated graphs



In [ ]:

    
%%time
from itertools import islice

"""
USING A SAMPLER
"""
# reloading, this is not necessary actually since we sould still have the object in memory 
from graphlearn.graphlearn import  Sampler

#sampler.load('tmp/sampler.ge')


# get a graph list and pic some graphs to initialize the sampler with.
# the sampler will look at each graphs and try n_steps times to alter it.
graphs = get_graphs(dataset_fname, size=100)
id_start=0
id_end=id_start+2
input_graphs = islice(graphs,id_start,id_end)

# sample parameters
n_steps=60 # how many steps
n_samples=4 # collect this many samples during the process


# sampler will return lists of graphs as you see below.
graphs = sampler.transform(input_graphs)



In [ ]:

    
%%time
# plot examples of sampling paths

scores=[]
ids=range(id_start,id_end)
for i,graphlist in enumerate(graphs):
    print 'Graph id: %d'%(ids[i])
    scores.append(sampler.monitors[i].sampling_info['score_history'])
    if BABELDRAW:
        from graphlearn.utils import openbabel
        openbabel.draw(graphlist, d3=False, n_graphs_per_line=6,size=200)
    else:
        from graphlearn.utils import draw
        draw.graphlearn(graphlist,contract=True,#vertex_label='id',
                   n_graphs_per_line=6, size=10, 
                   colormap='Paired', invert_colormap=False, vertex_color='_labels_',
                   vertex_alpha=0.5, edge_alpha=0.2)



In [ ]:

    
# we sampled with monitoring mode enabled, 
#this is why we could now look at what exactly happened during sampling
#this is step 9 of the 1st graph
#sampler.monitors[1][9]

Show sample score history



In [ ]:

    
%%time
# plot sampling path score
from itertools import islice
import numpy as np
import pylab as plt
markevery=n_steps/(n_samples)
step=1
num_graphs_per_plot=3
num_plots=np.ceil([len(scores)/num_graphs_per_plot])
for i in range(num_plots):

    plt.figure(figsize=(13,5))
    for j,score in enumerate(scores[i*num_graphs_per_plot:i*num_graphs_per_plot+num_graphs_per_plot]):
     
        data = list(islice(score,None, None, step))
        plt.plot(data, linewidth=2, label='graph %d'%(j+i*num_graphs_per_plot+id_start))
        plt.plot(data, linestyle='None',markevery=markevery, markerfacecolor='white', marker='o', markeredgewidth=2,markersize=6)
    plt.legend(loc='lower right')
    plt.grid()
    plt.xlim(-1,n_steps+1)
    plt.ylim(-0.1,1.1)
    plt.show()