In [1]:
from eden.util import configure_logging
import logging
configure_logging(logging.getLogger(),verbosity=2)
In [2]:
rfam_id = 'RF02275' #Hammerhead_HH9
rfam_id = 'RF00871' #microRNA mir-689
rfam_id = 'RF00005' #tRNA
def rfam_uri(family_id):
return '%s.fa'%(family_id)
def rfam_uri(family_id):
return 'http://rfam.xfam.org/family/%s/alignment?acc=%s&format=fastau&download=0'%(family_id,family_id)
In [3]:
%%time
from eden.converter.fasta import fasta_to_sequence
pos_seqs = fasta_to_sequence(rfam_uri(rfam_id))
from eden.converter.rna.rnafold import rnafold_to_eden
pre_processor_args={'max_num':3,'shape_type':5,'energy_range':20}
from eden.util import mp_pre_process
pos_graphs = mp_pre_process(pos_seqs, pre_processor=rnafold_to_eden, pre_processor_args=pre_processor_args, n_blocks=5, n_jobs=4)
In [4]:
%%time
from eden.converter.fasta import fasta_to_sequence
from eden.modifier.seq import seq_to_seq, shuffle_modifier
neg_seqs = seq_to_seq(fasta_to_sequence(rfam_uri(rfam_id)), modifier=shuffle_modifier, times=2)
from eden.converter.rna.rnafold import rnafold_to_eden
from eden.util import mp_pre_process
neg_graphs = mp_pre_process(neg_seqs, pre_processor=rnafold_to_eden, pre_processor_args=pre_processor_args, n_blocks=5, n_jobs=4)
In [5]:
train_test_split=.7
#split train/test
from eden.util import random_bipartition_iter
iterable_pos_train, iterable_pos_test = random_bipartition_iter(pos_graphs, relative_size=train_test_split)
iterable_neg_train, iterable_neg_test = random_bipartition_iter(neg_graphs, relative_size=train_test_split)
In [6]:
%%time
from eden.graph import Vectorizer
vectorizer=Vectorizer(complexity=3)
from eden.util import fit
estimator=fit(iterable_pos_train, iterable_neg_train, vectorizer, n_jobs=4, cv=10, n_iter_search=1, random_state=1)
In [7]:
%%time
from eden.util import estimate
estimate(iterable_pos_test, iterable_neg_test, estimator, vectorizer)
In [8]:
rfam_id = 'RF00005' #tRNA
print rfam_uri(rfam_id)