In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
from eden.util import configure_logging
import logging
DEBUG=False
NJOBS=4
if DEBUG: NJOBS=1
configure_logging(logging.getLogger(),verbosity=+DEBUG)
In [2]:
#from eden.converter.fasta import fasta_to_sequence
from eden_rna.io.fasta import load
import itertools
def rfam_uri(family_id):
return 'http://rfam.xfam.org/family/%s/alignment?acc=%s&format=fastau&download=0'%(family_id,family_id)
def rfam_uri(family_id):
return '%s.fa'%(family_id)
def get_sequences(size=9999,withoutnames=False):
sequences = itertools.islice( load("../../toolsdata/RF00005.fa"), size)
if withoutnames:
return [ b for (a,b) in sequences ]
return sequences
In [3]:
'''
from graphlearn.utils import draw
import graphlearn.minor.rna as rna
#forgitransform as forgitransform
#import graphlearn.abstract_graphs.rna.rnadecomposer as decompose
from graphlearn.graphlearn import Sampler as GLS
from eden.graph import Vectorizer
vectorizer=Vectorizer()
make_decomposer = lambda x,y: rna.rnadecomposer.RnaDecomposer(x,y,
include_base=False,
base_thickness_list=[2])
pp=rna.forgitransform.GraphTransformerForgi()
pp.fit(get_sequences(),vectorizer)
graphmanagers=[make_decomposer(vectorizer,x) for x in pp.transform(get_sequences(withoutnames=True)[:20])]
print 'DEMONSTRATING GRAPH MANAGER'
for i in [4]:
print 'grammar example %d' % i
gm=graphmanagers[i]
g=gm.pre_vectorizer_graph(nested=True)
draw.graphlearn([gm.pre_vectorizer_graph(nested=True),gm.abstract_graph(),gm.base_graph()], size = 15,vertex_label = 'label',contract=False)
draw.graphlearn([gm.base_graph()], size = 15,vertex_label = 'label',contract=True)
print gm.sequence
print gm.structure
'''
Out[3]:
In [7]:
%%time
from graphlearn.feasibility import FeasibilityChecker as Checker
import graphlearn.estimate as estimate
#from graphlearn.minor.decompose import MinorDecomposer
# not really needed since after refolding we get an RNA
#feasibility=Checker()
#feasibility.checklist.append(rna.is_rna)
graphs = get_sequences(size=100)
estimator=estimate.OneClassEstimator( nu=.33, cv=2, n_jobs=-1)
from graphlearn.minor.rna.rnadecomposer import RnaDecomposer
import graphlearn.minor.rna.infernal as infernal
from graphlearn.minor.rna import forgitransform as forgitransform
from graphlearn.minor.rna import rnatransform as rnatrans
sampler=infernal.AbstractSampler(
#radius_list=[0,1],
#thickness_list=[2],
#min_cip_count=1,
#min_interface_count=2,
################################# we need a learned transformer...
graphtransformer=rnatrans.learnedRnaTransformer(),#forgitransform.GraphTransformerForgi(fold_only=True),
decomposer=rnatrans.learnedRnaDedomposer(),#RnaDecomposer(output_sequence=True,pre_vectorizer_rm_f=True),
#estimator=estimator
#feasibility_checker=feasibility
include_seed=False
)
sampler.fit(graphs)
graphs = get_sequences(size=5,withoutnames=True)
r= list( sampler.transform(graphs))
In [ ]:
#draw production rules
from graphlearn.utils import draw
draw.draw_grammar(sampler.lsgg.productions,n_productions=5,n_graphs_per_production=5,
n_graphs_per_line=6, size=10, contract=False,
colormap='Paired', invert_colormap=False,
vertex_alpha=0.6, edge_alpha=0.5, abstract_interface=True)
In [ ]:
%%time
import graphlearn.utils.draw as draw
import itertools
# parameters and data
sequences = get_sequences(withoutnames=True)
id_start=66
id_end=id_start+3
sequences = sequences[id_start:id_end]
n_steps=50
#from eden_extra.modifier.graph.vertex_attributes import colorize
scores=[]
sequences=[]
ids=range(id_start,id_end)
for i,graphlist in enumerate(r):
#print 'Graph id: %d'%(ids[i])
scores.append(sampler.monitors[i].sampling_info['score_history'])
sequences.append(sampler.monitors[i].sampling_info['notes'])
#path_graphs = colorize(graphlist,
# output_attribute = 'color_level',
# labels = ['A','U','G','C'])
print 'going to draw'
print graphlist
if False:
draw.graphlearn(graphlist,
n_graphs_per_line=3, size=20, contract=True,
colormap='Paired', invert_colormap=False, vertex_color='color_level',
vertex_alpha=0.5, edge_alpha=0.7, edge_label='label', layout="RNA")
In [ ]:
colors=['b','g','r','c','m','y','k','w']
%matplotlib inline
from itertools import islice
import matplotlib.pyplot as plt
import numpy as np
step=1
num_graphs_per_plot=2
num_plots=np.ceil([len(scores)/num_graphs_per_plot])
for i in range(num_plots):
plt.figure(figsize=(10,5))
for j,score in enumerate(scores[i*num_graphs_per_plot:i*num_graphs_per_plot+num_graphs_per_plot]):
data = list(islice(score,None, None, step))
plt.plot(data,ls='-',color=colors[j], label='graph %d'%(j+i*num_graphs_per_plot+id_start))
# okok now we need to add the infernal evaluation
seqs=sequences[i*num_graphs_per_plot+j]
seqs=seqs.split('n')
# SEQUENCES STILL CONTAIN F, ALSO THERE ARE ERRORMESSAGES AT THE END OF INFO
#seqs=seqs[:-1]
#print seqs
data2= infernal.infernal_checker(seqs,
cmfile='../../toolsdata/rf00005.cm',
cmsearchbinarypath='../../toolsdata/cmsearch')
#print data2,seqs
plt.plot(data2,ls='--',color=colors[j], label='infernal %d'%(j+i*num_graphs_per_plot+id_start))
plt.plot([0.29]*len(data2),ls='-.',color='r', label='significance') #| '-' | '--' | '-.' | ':' | 'None' | ' ' |
plt.legend(loc='lower left',framealpha=0.5)
plt.grid()
plt.ylim(-0.1,1.1)
plt.show()
In [ ]:
print sequences
In [ ]:
import sys
sys.path
In [ ]:
In [ ]:
In [ ]:
In [ ]:
print r
In [ ]: