RNA-Puzzle 18

Init the library and needed functions.


In [6]:
import rna_tools.Seq as Seq
import rna_tools.BlastPDB
from rna_tools.BlastPDB import BlastPDB
reload(rna_tools.BlastPDB);
reload(Seq);

Create a RNASeqence object.


In [7]:
seq = Seq.RNASequence("GGGUCAGGCCGGCGAAAGUCGCCACAGUUUGGGGAAAGCUGUGCAGCCUGUAACCCCCCCACGAAAGUGGG")
print(seq)


rna_seq
GGGUCAGGCCGGCGAAAGUCGCCACAGUUUGGGGAAAGCUGUGCAGCCUGUAACCCCCCCACGAAAGUGGG

Secondary structure prediction


In [8]:
print(seq.predict_ss())


>rna_seq
GGGUCAGGCCGGCGAAAGUCGCCACAGUUUGGGGAAAGCUGUGCAGCCUGUAACCCCCCCACGAAAGUGGG
(((((((((.(((((...)))))(((((((.....)))))))...)))))..)))).(((((....))))) (-33.10)

In [9]:
print(seq.predict_ss(method='RNAsubopt'))


>rna_seq [100]
GGGUCAGGCCGGCGAAAGUCGCCACAGUUUGGGGAAAGCUGUGCAGCCUGUAACCCCCCCACGAAAGUGGG -33.10   1.00
(((((((((.((((.....))))(((((((.....)))))))...)))))..)))).(((((....))))) -32.40
(((((((((.(((((...)))))(((((((.....)))))))...)))))..)))).(((((....))))) -33.10
(((((((((((((....)))).((((((((.....))))))))..)))))..)))).(((((....))))) -32.30


In [10]:
print(seq.predict_ss(method='ipknot'))


GGGUCAGGCCGGCGAAAGUCGCCACAGUUUGGGGAAAGCUGUGCAGCCUGUAACCCCCCCACGAAAGUGGG
(((((((((((((....)))).((((((((.....))))))))..)))))..)))).(((((....)))))


In [11]:
print(seq.predict_ss(method='centroid_fold'))


---------------------------------------------------------------------------
CalledProcessError                        Traceback (most recent call last)
<ipython-input-11-fa5ebd28a883> in <module>()
----> 1 print(seq.predict_ss(method='centroid_fold'))

/Users/magnus/work-src/rna-tools/rna_tools/Seq.py in predict_ss(self, method, constraints, enforce_constraint, shapefn, explore, verbose)
    547 
    548         elif method == "centroid_fold":
--> 549             self.ss_log = subprocess.check_output('centroid_fold ' + tf.name, shell=True)
    550             return '\n'.join(self.ss_log.split('\n')[2:])
    551 

/Users/magnus/miniconda2/lib/python2.7/subprocess.pyc in check_output(*popenargs, **kwargs)
    221         if cmd is None:
    222             cmd = popenargs[0]
--> 223         raise CalledProcessError(retcode, cmd, output=output)
    224     return output
    225 

CalledProcessError: Command 'centroid_fold /var/folders/yc/ssr9692s5fzf7k165grnhpk80000gp/T/tmp4ZRI3P.fa' returned non-zero exit status 127

PDB Blast search


In [29]:
p = BlastPDB(seq.seq)
p.search()
print p.result


<HTML>
<TITLE>BLAST Search Results</TITLE>
<BODY BGCOLOR="#FFFFFF" LINK="#0000FF" VLINK="#660099" ALINK="#660099">
<PRE>
<b>BLASTN 2.2.18 [Mar-02-2008]</b>


<b><a href="http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=PubMed&cmd=Retrieve&list_uids
=9254694&dopt=Citation">Reference</a>:</b>
Altschul, Stephen F., Thomas L. Madden, Alejandro A. Sch&auml;ffer, 
Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), 
"Gapped BLAST and PSI-BLAST: a new generation of protein database search
programs",  Nucleic Acids Res. 25:3389-3402.

<b>Query=</b> UNKNOWN_SEQUENCE
         (71 letters)

<b>Database:</b> pdb_nucleotide 
           19,357 sequences; 3,678,086 total letters

Searching..................................................done

<PRE>


                                                                 Score    E
Sequences producing significant alignments:                      (bits) Value

5TPY:1:A|pdbid|entity|chain(s)|sequence                               <a href = #16642>105</a>   3e-24
4PQV:1:A|pdbid|entity|chain(s)|sequence                               <a href = #11431> 34</a>   0.010
</PRE>
<PRE>
><a name = 16642></a>5TPY:1:A|pdbid|entity|chain(s)|sequence
          Length = 71

 Score =  105 bits (53), Expect = 3e-24
 Identities = 53/53 (100%)
 Strand = Plus / Plus

                                                               
Query: 1  gggtcaggccggcgaaagtcgccacagtttggggaaagctgtgcagcctgtaa 53
          |||||||||||||||||||||||||||||||||||||||||||||||||||||
Sbjct: 1  gggtcaggccggcgaaagtcgccacagtttggggaaagctgtgcagcctgtaa 53
</PRE>


<PRE>
><a name = 11431></a>4PQV:1:A|pdbid|entity|chain(s)|sequence
          Length = 68

 Score = 34.2 bits (17), Expect = 0.010
 Identities = 23/25 (92%)
 Strand = Plus / Plus

                                   
Query: 1  gggtcaggccggcgaaagtcgccac 25
          |||||||  ||||||||||||||||
Sbjct: 1  gggtcagatcggcgaaagtcgccac 25
</PRE>


<PRE>
  Database: pdb_nucleotide
    Posted date:  Sep 28, 2018 10:56 PM
  Number of letters in database: 3,678,086
  Number of sequences in database:  19,357
  
Lambda     K      H
    1.37    0.711     1.31 

Gapped
Lambda     K      H
    1.37    0.711     1.31 


Matrix: blastn matrix:1 -3
Gap Penalties: Existence: 5, Extension: 2
Number of Sequences: 19357
Number of Hits to DB: 1840
Number of extensions: 12
Number of successful extensions: 7
Number of sequences better than 10.0: 2
Number of HSP's gapped: 7
Number of HSP's successfully gapped: 2
Length of query: 71
Length of database: 3,678,086
Length adjustment: 14
Effective length of query: 57
Effective length of database: 3,407,088
Effective search space: 194204016
Effective search space used: 194204016
X1: 10 (19.8 bits)
X2: 15 (29.7 bits)
X3: 50 (99.1 bits)
S1: 10 (20.3 bits)
S2: 12 (24.3 bits)
</PRE>
</BODY>
</HTML>

In [30]:
import rna_pdb_tools.RfamSearch as rf
#reload(rf)

#seq = Seq.Seq("GGGUCAGGCCGGCGAAAGUCGCCACAGUUUGGGGAAAGCUGUGCAGCCUGUAACCCCCCCACGAAAGUGGG")
rs = rf.RfamSearch()
print rs.cmscan(seq)


---------------------------------------------------------------------------
RfamSearchError                           Traceback (most recent call last)
<ipython-input-30-fc7568030162> in <module>()
      4 #seq = Seq.Seq("GGGUCAGGCCGGCGAAAGUCGCCACAGUUUGGGGAAAGCUGUGCAGCCUGUAACCCCCCCACGAAAGUGGG")
      5 rs = rf.RfamSearch()
----> 6 print rs.cmscan(seq)

/Users/magnus/work-src/rna-pdb-tools/rna_pdb_tools/RfamSearch.pyc in cmscan(self, seq)
     62         err = o.stderr.read().strip()
     63         if err:
---> 64             raise RfamSearchError(err)
     65         self.output = open(of.name).read()
     66         # os.chdir(old_pwd)

RfamSearchError: Error: File existence/permissions problem in trying to open CM file /home/magnus/work/db/rfamdb/Rfam.cm.
CM file /home/magnus/work/db/rfamdb/Rfam.cm not found (nor an .i1m binary of it); also looked in RFAMDB

3D structure analysis


In [ ]:
from rna_pdb_tools.pdb_parser_lib import RNAStructure

fn = "rna_pdb_tools/data/260c8ff6-f24e-4eff-9760-1831407fc770_ALL_thrs5.30A_clust01-000001_AA.pdb"

s = RNAStructure(fn)
print s.get_report()
print s.get_info_chains()
print s.get_head()
#print s.view() # image paste here :-)

In [ ]:
%%bash
cd rna_pdb_tools
./rna-pdb-tools.py --no_hr --get_seq data/260c8ff6-f24e-4eff-9760-1831407fc770_ALL_thrs5.30A_clust01-000001_AA.pdb

RNA 3D structure prediction


In [ ]:
# model using SimRNA
#res = SimRNA(ss,seq.get_ss())

In [ ]:
# fake import, should be 
res = "rna_pdb_tools/data/260c8ff6-f24e-4eff-9760-1831407fc770_ALL_thrs5.30A_clust01-000001_AA.pdb"
# view
view = nglview.show_structure_file(res)
view.add_representation(repr_type='cartoon')
view

rna_pdb_tools --get_seq


In [ ]:
%%bash
cd ~/rna-bench/opt/xxxcx rna_pdb_tools
./rna-pdb-tools.py --no_hr --get_seq ~/rna-bench/examples/5k7c.pdb

In [ ]:
%%bash
cd rna_pdb_tools
./rna-pdb-tools.py --no_hr --get_seq input/5k7c.pdb
./rna-pdb-tools.py --no_hr --get_seq input/tetraloop.pdb
./rna-pdb-tools.py --get_seq input/1xjr.pdb

In [ ]: