In [2]:
import nglview
import rna_tools.Seq as Seq

In [3]:
seq = Seq.RNASequence("GGCGCGGCACCGUCCGCGGAACAAACGG")

In [4]:
seq


Out[4]:
GGCGCGGCACCGUCCGCGGAACAAACGG

Secondary structure prediction


In [5]:
print(seq.predict_ss())


>rna_seq
GGCGCGGCACCGUCCGCGGAACAAACGG
..(((((......))))).......... ( -8.00)

In [6]:
print seq.predict_ss(method='RNAsubopt')


>rna_seq [100]
GGCGCGGCACCGUCCGCGGAACAAACGG  -8.00   1.00
..(((((......)))))..........  -8.00
..((((((...).)))))..........  -7.10


In [9]:
# print seq.predict_ss(method='ipknot')

In [10]:
# print seq.predict_ss(method='centroid_fold')

In [14]:
print(seq.predict_ss(method='contextfold'))


((((......))))..((.......)).



In [49]:
from rna_tools.rna_dot2ct import dot2ct
ss = seq.predict_ss(method='contextfold')
print(dot2ct(seq.seq, ss))


dot2ct /var/folders/yc/ssr9692s5fzf7k165grnhpk80000gp/T/tmpUtK9Bu /var/folders/yc/ssr9692s5fzf7k165grnhpk80000gp/T/tmpUtK9Bu.ct
Converting dot bracket file...Dot bracket file conversion complete. Created: <closed file '/var/folders/yc/ssr9692s5fzf7k165grnhpk80000gp/T/tmpUtK9Bu', mode 'w' at 0x10adb3540>.ct
28  ss
    1 G       0    2   14    1
    2 G       1    3   13    2
    3 C       2    4   12    3
    4 G       3    5   11    4
    5 C       4    6    0    5
    6 G       5    7    0    6
    7 G       6    8    0    7
    8 C       7    9    0    8
    9 A       8   10    0    9
   10 C       9   11    0   10
   11 C      10   12    4   11
   12 G      11   13    3   12
   13 U      12   14    2   13
   14 C      13   15    1   14
   15 C      14   16    0   15
   16 G      15   17    0   16
   17 C      16   18   27   17
   18 G      17   19   26   18
   19 G      18   20    0   19
   20 A      19   21    0   20
   21 A      20   22    0   21
   22 C      21   23    0   22
   23 A      22   24    0   23
   24 A      23   25    0   24
   25 A      24   26    0   25
   26 C      25   27   18   26
   27 G      26   28   17   27
   28 G      27    0    0   28

RFAM search


In [17]:
import rna_pdb_tools.RfamSearch as rf
reload(rf)

seq = Seq.Seq("GGGUCGUGACUGGCGAACAGGUGGGAAACCACCGGGGAGCGACCCCGGCAUCGAUAGCCGCCCGCCUGGGC")
rs = rf.RfamSearch()
print rs.cmscan(seq)


GGGUCGUGACUGGCGAACAGGUGGGAAACCACCGGGGAGCGACCCCGGCAUCGAUAGCCGCCCGCCUGGGC
# cmscan :: search sequence(s) against a CM database
# INFERNAL 1.1.2 (July 2016)
# Copyright (C) 2016 Howard Hughes Medical Institute.
# Freely distributed under a BSD open source license.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# query sequence file:                   /tmp/ss.fa
# target CM database:                    Rfam.cm
# sequence reporting threshold:          E-value <= 1
# number of worker threads:              4
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Query:       test  [L=71]
Hit scores:
 rank     E-value  score  bias  modelname  start    end   mdl trunc   gc  description
 ----   --------- ------ -----  --------- ------ ------   --- ----- ----  -----------
  (1) !   1.3e-06   44.6   0.1  pfl            1     71 +  cm 5'&3' 0.72  -


Hit alignments:
>> pfl  
 rank     E-value  score  bias mdl mdl from   mdl to       seq from      seq to       acc trunc   gc
 ----   --------- ------ ----- --- -------- --------    ----------- -----------      ---- ----- ----
  (1) !   1.3e-06   44.6   0.1  cm        6       84 ~~           1          71 + ~~ 0.97 5'&3' 0.72

                                                                        ???      ??                     NC
          ~~~~~(((((((,,,,,,,,,,,,,<<<___>>>,,<<<<_____>>>>.,,,,,))))))))))---...<<<<<_________>>>~~~~~ CS
   pfl  1 <[5]*cccgcgcgACUGGCGaaaacggcuuagccguGUGGAucuACCAC.GGGgAgcgcgggggguaa...cuGCCGauCGCCUGGGC*[7]> 91
               ::::CG:GACUGGCGAA A            GUGG ++ ACCAC GGGGA:CG:::: GG A+     GCCG +CGCCUGGGC     
  test  1 <[0]*GGGUCGUGACUGGCGAACAG-----------GUGGGAA-ACCACcGGGGAGCGACCCCGGCAUcgaUAGCCGCCCGCCUGGGC*[0]> 71
          .....*****************966...........******9.*********************888********************..... PP



Internal HMM-only pipeline statistics summary: (run for model(s) with zero basepairs)
--------------------------------------------------------------------------------------
Query sequence(s):                                               1  (142 residues searched)
Target model(s):                                               347  (40911 consensus positions)
Windows   passing  local HMM SSV           filter:               5  (0.007205); expected (0.02)
Windows   passing  local HMM MSV      bias filter:               3  (0.004323); expected (0.02)
Windows   passing  local HMM Viterbi       filter:               0  (0); expected (0.001)
Windows   passing  local HMM Forward       filter:               0  (0); expected (1e-05)
Total HMM hits reported:                                         0  (0)

Internal CM pipeline statistics summary:
----------------------------------------
Query sequence(s):                                               1  (142 residues searched)
Query sequences re-searched for truncated hits:                  1  (410.5 residues re-searched, avg per model)
Target model(s):                                              2126  (267652 consensus positions)
Windows   passing  local HMM SSV           filter:            1029  (0.05685); expected (0.35)
Windows   passing  local HMM Viterbi       filter:                  (off)
Windows   passing  local HMM Viterbi  bias filter:                  (off)
Windows   passing  local HMM Forward       filter:             414  (0.02386); expected (0.02)
Windows   passing  local HMM Forward  bias filter:             350  (0.02025); expected (0.02)
Windows   passing glocal HMM Forward       filter:             104  (0.005853); expected (0.02)
Windows   passing glocal HMM Forward  bias filter:              92  (0.005128); expected (0.02)
Envelopes passing glocal HMM envelope defn filter:              86  (0.004435); expected (0.02)
Envelopes passing  local CM  CYK           filter:              11  (0.0004274); expected (0.0001)
Total CM hits reported:                                          1  (0.0001209); includes 1 truncated hit(s)

Total CM and HMM hits reported:                                  1

# CPU time: 4.04u 0.58s 00:00:04.62 Elapsed: 00:00:03.25
//
[ok]

3D structure analysis


In [18]:
from rna_pdb_tools.pdb_parser_lib import RNAStructure

fn = "rna_pdb_tools/data/260c8ff6-f24e-4eff-9760-1831407fc770_ALL_thrs5.30A_clust01-000001_AA.pdb"

s = RNAStructure(fn)
print s.get_report()
print s.get_info_chains()
print s.get_head()
#print s.view() # image paste here :-)


The RNARNAStructure report: rna_pdb_tools/data/260c8ff6-f24e-4eff-9760-1831407fc770_ALL_thrs5.30A_clust01-000001_AA.pdb 
A:1-43 B:1-10
ATOM      1  P     G A   1     -12.509  18.639  13.726  1.00  0.00
ATOM      2  OP1   G A   1     -13.934  18.507  14.168  1.00  0.00
ATOM      3  OP2   G A   1     -11.541  17.557  14.097  1.00  0.00
ATOM      4  O5'   G A   1     -12.604  18.683  12.146  1.00  0.00
ATOM      5  C5'   G A   1     -13.512  19.569  11.525  1.00  0.00

In [19]:
%%bash
cd rna_pdb_tools
./rna-pdb-tools.py --no_hr --get_seq data/260c8ff6-f24e-4eff-9760-1831407fc770_ALL_thrs5.30A_clust01-000001_AA.pdb


> 260c8ff6-f24e-4eff-9760-1831407fc770_ALL_thrs5.30A_clust01-000001_AA.pdb A:1-43
GGCGGAACCGGUGAGUACACCGGAAUCCGAAAGGAUUUGGGCG
> 260c8ff6-f24e-4eff-9760-1831407fc770_ALL_thrs5.30A_clust01-000001_AA.pdb B:1-10
UGCCCCCGCC

RNA 3D structure prediction


In [20]:
# model using SimRNA
#res = SimRNA(ss,seq.get_ss())

In [21]:
# fake import, should be 
res = "rna_pdb_tools/data/260c8ff6-f24e-4eff-9760-1831407fc770_ALL_thrs5.30A_clust01-000001_AA.pdb"
# view
view = nglview.show_structure_file(res)
view.add_representation(repr_type='cartoon')
view

rna_pdb_tools --get_seq


In [22]:
%%bash
cd rna_pdb_tools
./rna-pdb-tools.py --no_hr --get_seq input/5k7c.pdb


> 5k7c.pdb A:1-47
CGUGGUUAGGGCCACGUUAAAUAGUUGCUUAAGCCCUAAGCGUUGAU
> 5k7c.pdb B:48-58
AUCAGgUGCAA

In [23]:
%%bash
cd rna_pdb_tools
./rna-pdb-tools.py --no_hr --get_seq input/5k7c.pdb
./rna-pdb-tools.py --no_hr --get_seq input/tetraloop.pdb
./rna-pdb-tools.py --get_seq input/1xjr.pdb


> 5k7c.pdb A:1-47
CGUGGUUAGGGCCACGUUAAAUAGUUGCUUAAGCCCUAAGCGUUGAU
> 5k7c.pdb B:48-58
AUCAGgUGCAA
> tetraloop.pdb A:4-7
GCAA
> 1xjr.pdb A:1-47
GGAGUUCACCGAGGCCACGCGGAGUACGAUCGAGGGUACAGUGAAUU

In [ ]: