sampling RNA, with secondary structure elements to induce graph minor

initialize


In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
from eden.util import configure_logging
import logging


DEBUG=False
NJOBS=4
if DEBUG: NJOBS=1
configure_logging(logging.getLogger(),verbosity=+DEBUG)

GET RNA DATA


In [2]:
#from eden.converter.fasta import fasta_to_sequence
from eden_rna.io.fasta import load 
import itertools

def rfam_uri(family_id):
    return 'http://rfam.xfam.org/family/%s/alignment?acc=%s&format=fastau&download=0'%(family_id,family_id)
def rfam_uri(family_id):
    return '%s.fa'%(family_id)

def get_sequences(size=9999,withoutnames=False):
    sequences = itertools.islice( load("../../toolsdata/RF00005.fa"), size)
    if withoutnames:
        return [ b for (a,b) in sequences ]
    return sequences

TESTING EXTRACTION AND GRAPHMANAGER


In [3]:
'''
from graphlearn.utils import draw
import graphlearn.minor.rna as rna
#forgitransform as forgitransform
#import graphlearn.abstract_graphs.rna.rnadecomposer as decompose


from graphlearn.graphlearn import Sampler as GLS
from eden.graph import Vectorizer
vectorizer=Vectorizer()


make_decomposer = lambda x,y: rna.rnadecomposer.RnaDecomposer(x,y,
                       include_base=False,
                       base_thickness_list=[2])

pp=rna.forgitransform.GraphTransformerForgi()
pp.fit(get_sequences(),vectorizer)

graphmanagers=[make_decomposer(vectorizer,x) for x in pp.transform(get_sequences(withoutnames=True)[:20])]

print 'DEMONSTRATING GRAPH MANAGER'

for i in [4]:
    print 'grammar example %d' % i
    gm=graphmanagers[i]
    g=gm.pre_vectorizer_graph(nested=True)
    draw.graphlearn([gm.pre_vectorizer_graph(nested=True),gm.abstract_graph(),gm.base_graph()], size = 15,vertex_label = 'label',contract=False)
    draw.graphlearn([gm.base_graph()], size = 15,vertex_label = 'label',contract=True)
    
    print gm.sequence
    print gm.structure
'''


Out[3]:
"\nfrom graphlearn.utils import draw\nimport graphlearn.minor.rna as rna\n#forgitransform as forgitransform\n#import graphlearn.abstract_graphs.rna.rnadecomposer as decompose\n\n\nfrom graphlearn.graphlearn import Sampler as GLS\nfrom eden.graph import Vectorizer\nvectorizer=Vectorizer()\n\n\nmake_decomposer = lambda x,y: rna.rnadecomposer.RnaDecomposer(x,y,\n                       include_base=False,\n                       base_thickness_list=[2])\n\npp=rna.forgitransform.GraphTransformerForgi()\npp.fit(get_sequences(),vectorizer)\n\ngraphmanagers=[make_decomposer(vectorizer,x) for x in pp.transform(get_sequences(withoutnames=True)[:20])]\n\nprint 'DEMONSTRATING GRAPH MANAGER'\n\nfor i in [4]:\n    print 'grammar example %d' % i\n    gm=graphmanagers[i]\n    g=gm.pre_vectorizer_graph(nested=True)\n    draw.graphlearn([gm.pre_vectorizer_graph(nested=True),gm.abstract_graph(),gm.base_graph()], size = 15,vertex_label = 'label',contract=False)\n    draw.graphlearn([gm.base_graph()], size = 15,vertex_label = 'label',contract=True)\n    \n    print gm.sequence\n    print gm.structure\n"

creating grammar and estimator


In [7]:
%%time

from  graphlearn.feasibility import FeasibilityChecker as Checker
import graphlearn.estimate as estimate

#from graphlearn.minor.decompose import MinorDecomposer

# not really needed since after refolding we get an RNA
#feasibility=Checker()
#feasibility.checklist.append(rna.is_rna)
graphs = get_sequences(size=100)

estimator=estimate.OneClassEstimator( nu=.33, cv=2, n_jobs=-1)

from graphlearn.minor.rna.rnadecomposer import RnaDecomposer
import graphlearn.minor.rna.infernal as infernal
from graphlearn.minor.rna import forgitransform as forgitransform
from graphlearn.minor.rna import rnatransform as rnatrans
sampler=infernal.AbstractSampler(
                            #radius_list=[0,1],
                            #thickness_list=[2],
                            #min_cip_count=1,
                            #min_interface_count=2,
                            #################################  we need a learned transformer... 
                            graphtransformer=rnatrans.learnedRnaTransformer(),#forgitransform.GraphTransformerForgi(fold_only=True), 
                            decomposer=rnatrans.learnedRnaDedomposer(),#RnaDecomposer(output_sequence=True,pre_vectorizer_rm_f=True),
                            #estimator=estimator
                            #feasibility_checker=feasibility
                            include_seed=False
                           )

sampler.fit(graphs)
graphs = get_sequences(size=5,withoutnames=True)
r= list( sampler.transform(graphs))


---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-7-1e25c6933359> in <module>()
----> 1 get_ipython().run_cell_magic(u'time', u'', u'\nfrom  graphlearn.feasibility import FeasibilityChecker as Checker\nimport graphlearn.estimate as estimate\n\n#from graphlearn.minor.decompose import MinorDecomposer\n\n# not really needed since after refolding we get an RNA\n#feasibility=Checker()\n#feasibility.checklist.append(rna.is_rna)\ngraphs = get_sequences(size=100)\n\nestimator=estimate.OneClassEstimator( nu=.33, cv=2, n_jobs=-1)\n\nfrom graphlearn.minor.rna.rnadecomposer import RnaDecomposer\nimport graphlearn.minor.rna.infernal as infernal\nfrom graphlearn.minor.rna import forgitransform as forgitransform\nfrom graphlearn.minor.rna import rnatransform as rnatrans\nsampler=infernal.AbstractSampler(\n                            #radius_list=[0,1],\n                            #thickness_list=[2],\n                            #min_cip_count=1,\n                            #min_interface_count=2,\n                            #################################  we need a learned transformer... \n                            graphtransformer=rnatrans.learnedRnaTransformer(),#forgitransform.GraphTransformerForgi(fold_only=True), \n                            decomposer=rnatrans.learnedRnaDedomposer(),#RnaDecomposer(output_sequence=True,pre_vectorizer_rm_f=True),\n                            #estimator=estimator\n                            #feasibility_checker=feasibility\n                            include_seed=False\n                           )\n\nsampler.fit(graphs)\ngraphs = get_sequences(size=5,withoutnames=True)\nr= list( sampler.transform(graphs))')

/home/ikea/.local/lib/python2.7/site-packages/IPython/core/interactiveshell.pyc in run_cell_magic(self, magic_name, line, cell)
   2113             magic_arg_s = self.var_expand(line, stack_depth)
   2114             with self.builtin_trap:
-> 2115                 result = fn(magic_arg_s, cell)
   2116             return result
   2117 

<decorator-gen-59> in time(self, line, cell, local_ns)

/home/ikea/.local/lib/python2.7/site-packages/IPython/core/magic.pyc in <lambda>(f, *a, **k)
    186     # but it's overkill for just that one bit of state.
    187     def magic_deco(arg):
--> 188         call = lambda f, *a, **k: f(*a, **k)
    189 
    190         if callable(arg):

/home/ikea/.local/lib/python2.7/site-packages/IPython/core/magics/execution.pyc in time(self, line, cell, local_ns)
   1183         else:
   1184             st = clock2()
-> 1185             exec(code, glob, local_ns)
   1186             end = clock2()
   1187             out = None

<timed exec> in <module>()

/home/ikea/GRAPHLEARN/GraphLearn/graphlearn/graphlearn.pyc in fit(self, graphs)
    422     def fit(self, graphs):
    423         decomposers = [self.decomposer.make_new_decomposer(data)
--> 424                        for data in self.graph_transformer.fit_transform(graphs)]
    425         self.fit_grammar(decomposers)
    426         self.fit_estimator(decomposers)

/home/ikea/GRAPHLEARN/GraphLearn/graphlearn/learnedlayer/transform.pyc in fit_transform(self, inputs, inputs_neg)
    127 
    128     def fit_transform(self, inputs, inputs_neg=[]):
--> 129         return self.fit(inputs, inputs_neg, fit_transform=True)
    130 
    131     def re_transform_single(self, graph):

TypeError: fit() takes exactly 2 arguments (4 given)

In [ ]:
#draw production rules
from graphlearn.utils import draw
draw.draw_grammar(sampler.lsgg.productions,n_productions=5,n_graphs_per_production=5,
                     n_graphs_per_line=6, size=10, contract=False,
                     colormap='Paired', invert_colormap=False,
                     vertex_alpha=0.6, edge_alpha=0.5, abstract_interface=True)

Sample


In [ ]:
%%time
import graphlearn.utils.draw as draw
import itertools

# parameters and data
sequences = get_sequences(withoutnames=True)
id_start=66
id_end=id_start+3
sequences = sequences[id_start:id_end]
n_steps=50





#from eden_extra.modifier.graph.vertex_attributes import colorize


scores=[]
sequences=[]
ids=range(id_start,id_end)

for i,graphlist in enumerate(r):
    #print 'Graph id: %d'%(ids[i])
    scores.append(sampler.monitors[i].sampling_info['score_history'])
    sequences.append(sampler.monitors[i].sampling_info['notes'])
    #path_graphs = colorize(graphlist,
    #                       output_attribute = 'color_level', 
    #                       labels = ['A','U','G','C'])
    print 'going to draw'
    print graphlist
    if False:
        draw.graphlearn(graphlist,
                           n_graphs_per_line=3, size=20, contract=True,
                           colormap='Paired', invert_colormap=False, vertex_color='color_level',
                           vertex_alpha=0.5, edge_alpha=0.7, edge_label='label', layout="RNA")

draw the score history for each of the graphsv


In [ ]:
colors=['b','g','r','c','m','y','k','w']

%matplotlib inline
from itertools import islice
import matplotlib.pyplot as plt
import numpy as np
step=1
num_graphs_per_plot=2
num_plots=np.ceil([len(scores)/num_graphs_per_plot])
for i in range(num_plots):
    plt.figure(figsize=(10,5))
    for j,score in enumerate(scores[i*num_graphs_per_plot:i*num_graphs_per_plot+num_graphs_per_plot]):
        data = list(islice(score,None, None, step))
        plt.plot(data,ls='-',color=colors[j], label='graph %d'%(j+i*num_graphs_per_plot+id_start))

        # okok now we need to add the infernal evaluation
        seqs=sequences[i*num_graphs_per_plot+j]
        seqs=seqs.split('n')
        
        # SEQUENCES STILL CONTAIN F, ALSO THERE ARE ERRORMESSAGES AT THE END OF INFO
        #seqs=seqs[:-1]
    
        #print seqs
        data2= infernal.infernal_checker(seqs,
                                         cmfile='../../toolsdata/rf00005.cm',
                                        cmsearchbinarypath='../../toolsdata/cmsearch')
        #print data2,seqs
        plt.plot(data2,ls='--',color=colors[j], label='infernal %d'%(j+i*num_graphs_per_plot+id_start))
    plt.plot([0.29]*len(data2),ls='-.',color='r', label='significance') #| '-' | '--' | '-.' | ':' | 'None' | ' ' |
    plt.legend(loc='lower left',framealpha=0.5)
    plt.grid()
    plt.ylim(-0.1,1.1)
    plt.show()

In [ ]:
print sequences

In [ ]:
import sys
sys.path

In [ ]:


In [ ]:


In [ ]:


In [ ]:
print r

In [ ]: