In [1]:
import nglview
import rna_pdb_tools.Seq as Seq
import rna_pdb_tools.BlastPDB
from rna_pdb_tools.BlastPDB import BlastPDB
reload(rna_pdb_tools.BlastPDB);
reload(Seq);

In [2]:
seq = Seq.Seq("GGGUCAGGCCGGCGAAAGUCGCCACAGUUUGGGGAAAGCUGUGCAGCCUGUAACCCCCCCACGAAAGUGGG")

In [3]:
seq


Out[3]:
GGGUCAGGCCGGCGAAAGUCGCCACAGUUUGGGGAAAGCUGUGCAGCCUGUAACCCCCCCACGAAAGUGGG

Secondary structure prediction


In [12]:
print seq.predict_ss()


(((((((((.(((((...)))))(((((((.....)))))))...)))))..)))).(((((....)))))

In [13]:
print seq.predict_ss(method='RNAsubopt')


GGGUCAGGCCGGCGAAAGUCGCCACAGUUUGGGGAAAGCUGUGCAGCCUGUAACCCCCCCACGAAAGUGGG  -3310    100
(((((((((.((((.....))))(((((((.....)))))))...)))))..)))).(((((....))))) -32.40
(((((((((.(((((...)))))(((((((.....)))))))...)))))..)))).(((((....))))) -33.10
(((((((((((((....)))).((((((((.....))))))))..)))))..)))).(((((....))))) -32.30

In [14]:
print seq.predict_ss(method='ipknot')


(((((((((((((....)))).((((((((.....))))))))..)))))..)))).(((((....)))))

In [15]:
print seq.predict_ss(method='centroid_fold')


(((((((((.(((....)))..((((((((.....))))))))..)))))..)))).(((((....))))) (g=1,th=0.5,e=-27.26)

In [16]:
print seq.predict_ss(method='contextfold')


(((.(((((((((....)))).(((((((.......)))))))..)))))...))).(((((....)))))


In [20]:
import rna_pdb_tools.RfamSearch as rf
#reload(rf)

#seq = Seq.Seq("GGGUCAGGCCGGCGAAAGUCGCCACAGUUUGGGGAAAGCUGUGCAGCCUGUAACCCCCCCACGAAAGUGGG")
rs = rf.RfamSearch()
print rs.cmscan(seq)


GGGUCAGGCCGGCGAAAGUCGCCACAGUUUGGGGAAAGCUGUGCAGCCUGUAACCCCCCCACGAAAGUGGG
# cmscan :: search sequence(s) against a CM database
# INFERNAL 1.1.2 (July 2016)
# Copyright (C) 2016 Howard Hughes Medical Institute.
# Freely distributed under a BSD open source license.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# query sequence file:                   /tmp/ss.fa
# target CM database:                    Rfam.cm
# sequence reporting threshold:          E-value <= 1
# number of worker threads:              4
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Query:       test  [L=71]
Hit scores:
 rank     E-value  score  bias  modelname        start    end   mdl trunc   gc  description
 ----   --------- ------ -----  --------------- ------ ------   --- ----- ----  -----------
  (1) !   0.00046   24.2   0.0  JEV_hairpin          3     54 +  cm    no 0.60  -
 ------ inclusion threshold ------
  (2) ?      0.22   17.7   0.0  Flavivirus_SLIV      2     71 +  cm    no 0.63  -
  (3) ?      0.57   12.9   0.0  YFV_3UTR             5     50 +  cm    no 0.63  -
  (4) ?       0.8   10.4   0.0  Chlorobi-1          51     71 +  cm    3' 0.62  -


Hit alignments:
>> JEV_hairpin  
 rank     E-value  score  bias mdl mdl from   mdl to       seq from      seq to       acc trunc   gc
 ----   --------- ------ ----- --- -------- --------    ----------- -----------      ---- ----- ----
  (1) !   0.00046   24.2   0.0  cm        6       59 .]           3          54 + .. 0.90    no 0.60

                         v         v           v         v           NC
                 <-<<<<<-<<<<----<<<<<--~~~~~>>>>>---->>>>>>>>>>:::  CS
  JEV_hairpin  6 GUCAGGCCAGCAAAaGCuGCcAC*[9]*gGUaGACGGUGCUGCCUGCGuC  60
                 :UCAGGCC GC:AAAG:: CCAC     GG :: C G:GC GCCUG: +C 
         test  3 GUCAGGCCGGCGAAAGUCGCCAC*[7]*GGAAAGCUGUGCAGCCUGUAAC  55
                 *********************98..5..88888899**************  PP

>> Flavivirus_SLIV  
 rank     E-value  score  bias mdl mdl from   mdl to       seq from      seq to       acc trunc   gc
 ----   --------- ------ ----- --- -------- --------    ----------- -----------      ---- ----- ----
  (2) ?      0.22   17.7   0.0  cm        5       74 ..           2          71 + .] 0.86    no 0.63

                                 v    v                                                     NC
                     ((((((((<<<<<____>>>>>,<<<<__~~~~~~>>>>,,)))))--)))----<<<<<_____>>>>> CS
  Flavivirus_SLIV  5 uGuCAGgCCACgcuuuagcGUGCCACuCu*[ 6]*aGUGCaGcCUGcGaCaguGCCCCAGGaGGACUGGG 74
                     :GUCAGGCC: : ++   : :GCCAC: U      :GUGCAGCCUG+ AC:   CCCCA GA  A UGGG
             test  2 GGUCAGGCCGGCGAA--AGUCGCCACAGU*[11]*UGUGCAGCCUGUAACCC--CCCCACGAA-AGUGGG 71
                     **********98766..778******854...6..8***************7..8*****984.7***** PP

>> YFV_3UTR  
 rank     E-value  score  bias mdl mdl from   mdl to       seq from      seq to       acc trunc   gc
 ----   --------- ------ ----- --- -------- --------    ----------- -----------      ---- ----- ----
  (3) ?      0.57   12.9   0.0  cm        9       65 ..           5          50 + .. 0.85    no 0.63

                       vv         vv                                    NC
              (((((<<<<<<_________>>>>>>,<<<<<<<_________>>>>>>>,,))))) CS
  YFV_3UTR  9 CaGCCCagaaCuCCACACGAGuuuuGCCaCuGCuAAGCUGUGAgGCaGuGCAGGCuG 65
              CAG:CC:G:         GA++:U:GCCAC:G :  G  G  A: C:GUGCAG:CUG
      test  5 CAGGCCGGC---------GAAAGUCGCCACAGUUUGG--GGAAAGCUGUGCAGCCUG 50
              *******96.........45558*********99655..445699************ PP

>> Chlorobi-1  
 rank     E-value  score  bias mdl mdl from   mdl to       seq from      seq to       acc trunc   gc
 ----   --------- ------ ----- --- -------- --------    ----------- -----------      ---- ----- ----
  (4) ?       0.8   10.4   0.0  cm        1       21 [~          51          71 + .~ 1.00    3' 0.62

                    ???                     NC
                ::::<<<<<<<<____>>>>>~~~~~~ CS
  Chlorobi-1  1 gAAAuaCCuCACGCCaGUGaG*[46]> 67
                 AA   CC:CACG  AGUG:G      
        test 51 UAACCCCCCCACGAAAGUGGG*[ 0]> 71
                *********************...... PP



Internal HMM-only pipeline statistics summary: (run for model(s) with zero basepairs)
--------------------------------------------------------------------------------------
Query sequence(s):                                               1  (142 residues searched)
Target model(s):                                               347  (40911 consensus positions)
Windows   passing  local HMM SSV           filter:               4  (0.005764); expected (0.02)
Windows   passing  local HMM MSV      bias filter:               4  (0.005764); expected (0.02)
Windows   passing  local HMM Viterbi       filter:               0  (0); expected (0.001)
Windows   passing  local HMM Forward       filter:               0  (0); expected (1e-05)
Total HMM hits reported:                                         0  (0)

Internal CM pipeline statistics summary:
----------------------------------------
Query sequence(s):                                               1  (142 residues searched)
Query sequences re-searched for truncated hits:                  1  (410.5 residues re-searched, avg per model)
Target model(s):                                              2126  (267652 consensus positions)
Windows   passing  local HMM SSV           filter:            1094  (0.05973); expected (0.35)
Windows   passing  local HMM Viterbi       filter:                  (off)
Windows   passing  local HMM Viterbi  bias filter:                  (off)
Windows   passing  local HMM Forward       filter:             402  (0.0226); expected (0.02)
Windows   passing  local HMM Forward  bias filter:             376  (0.02123); expected (0.02)
Windows   passing glocal HMM Forward       filter:              86  (0.004806); expected (0.02)
Windows   passing glocal HMM Forward  bias filter:              84  (0.004726); expected (0.02)
Envelopes passing glocal HMM envelope defn filter:              81  (0.003846); expected (0.02)
Envelopes passing  local CM  CYK           filter:              13  (0.0006036); expected (0.0001)
Total CM hits reported:                                          4  (0.0002231); includes 2 truncated hit(s)

Total CM and HMM hits reported:                                  4

# CPU time: 5.04u 0.45s 00:00:05.49 Elapsed: 00:00:03.59
//
[ok]

PDB Blast search


In [19]:
p = BlastPDB(seq.seq)
p.search()
print p.result


<HTML>
<TITLE>BLAST Search Results</TITLE>
<BODY BGCOLOR="#FFFFFF" LINK="#0000FF" VLINK="#660099" ALINK="#660099">
<PRE>
<b>BLASTN 2.2.18 [Mar-02-2008]</b>


<b><a href="http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=PubMed&cmd=Retrieve&list_uids
=9254694&dopt=Citation">Reference</a>:</b>
Altschul, Stephen F., Thomas L. Madden, Alejandro A. Sch&auml;ffer, 
Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), 
"Gapped BLAST and PSI-BLAST: a new generation of protein database search
programs",  Nucleic Acids Res. 25:3389-3402.

<b>Query=</b> UNKNOWN_SEQUENCE
         (71 letters)

<b>Database:</b> pdb_nucleotide 
           15,787 sequences; 2,694,141 total letters

Searching..................................................done

<PRE>


                                                                 Score    E
Sequences producing significant alignments:                      (bits) Value

4PQV:1:A|pdbid|entity|chain(s)|sequence                               <a href = #11432> 34</a>   0.007
</PRE>
<PRE>
><a name = 11432></a>4PQV:1:A|pdbid|entity|chain(s)|sequence
          Length = 68

 Score = 34.2 bits (17), Expect = 0.007
 Identities = 23/25 (92%)
 Strand = Plus / Plus

                                   
Query: 1  gggtcaggccggcgaaagtcgccac 25
          |||||||  ||||||||||||||||
Sbjct: 1  gggtcagatcggcgaaagtcgccac 25
</PRE>


<PRE>
  Database: pdb_nucleotide
    Posted date:  Oct 30, 2016  3:26 AM
  Number of letters in database: 2,694,141
  Number of sequences in database:  15,787
  
Lambda     K      H
    1.37    0.711     1.31 

Gapped
Lambda     K      H
    1.37    0.711     1.31 


Matrix: blastn matrix:1 -3
Gap Penalties: Existence: 5, Extension: 2
Number of Sequences: 15787
Number of Hits to DB: 1369
Number of extensions: 6
Number of successful extensions: 6
Number of sequences better than 10.0: 1
Number of HSP's gapped: 6
Number of HSP's successfully gapped: 1
Length of query: 71
Length of database: 2,694,141
Length adjustment: 14
Effective length of query: 57
Effective length of database: 2,473,123
Effective search space: 140968011
Effective search space used: 140968011
X1: 10 (19.8 bits)
X2: 15 (29.7 bits)
X3: 50 (99.1 bits)
S1: 10 (20.3 bits)
S2: 12 (24.3 bits)
</PRE>
</BODY>
</HTML>

3D structure analysis


In [ ]:
from rna_pdb_tools.pdb_parser_lib import RNAStructure

fn = "rna_pdb_tools/data/260c8ff6-f24e-4eff-9760-1831407fc770_ALL_thrs5.30A_clust01-000001_AA.pdb"

s = RNAStructure(fn)
print s.get_report()
print s.get_info_chains()
print s.get_head()
#print s.view() # image paste here :-)

In [ ]:
%%bash
cd rna_pdb_tools
./rna-pdb-tools.py --no_hr --get_seq data/260c8ff6-f24e-4eff-9760-1831407fc770_ALL_thrs5.30A_clust01-000001_AA.pdb

RNA 3D structure prediction


In [ ]:
# model using SimRNA
#res = SimRNA(ss,seq.get_ss())

In [4]:
# fake import, should be 
res = "rna_pdb_tools/data/260c8ff6-f24e-4eff-9760-1831407fc770_ALL_thrs5.30A_clust01-000001_AA.pdb"
# view
view = nglview.show_structure_file(res)
view.add_representation(repr_type='cartoon')
view

rna_pdb_tools --get_seq


In [ ]:
%%bash
cd rna_pdb_tools
./rna-pdb-tools.py --no_hr --get_seq input/5k7c.pdb

In [ ]:
%%bash
cd rna_pdb_tools
./rna-pdb-tools.py --no_hr --get_seq input/5k7c.pdb
./rna-pdb-tools.py --no_hr --get_seq input/tetraloop.pdb
./rna-pdb-tools.py --get_seq input/1xjr.pdb

In [ ]: