In [20]:
import nglview
import rna_pdb_tools.Seq as Seq
import rna_pdb_tools.BlastPDB
from rna_pdb_tools.BlastPDB import BlastPDB
reload(rna_pdb_tools.BlastPDB);
reload(Seq);

In [21]:
seq = Seq.RNASequence("CCGGACGAGGGGCGCCGUACCCGGUCAGCGACAAGACGGCGC")

In [22]:
seq


Out[22]:
CCGGACGAGGGGCGCCGUACCCGGUCAGCGACAAGACGGCGC

Secondary structure prediction


In [31]:
print seq.predict_ss()


None

In [32]:
print seq.predict_ss(method='RNAsubopt')


None

In [33]:
print seq.predict_ss(method='ipknot')


---------------------------------------------------------------------------
CalledProcessError                        Traceback (most recent call last)
<ipython-input-33-076de9a8db2f> in <module>()
----> 1 print seq.predict_ss(method='ipknot')

/Users/magnus/work/src/rna-pdb-tools/rna_pdb_tools/Seq.pyc in predict_ss(self, method, constraints, verbose)
    118 
    119         if method == "ipknot":
--> 120             self.ss_log = subprocess.check_output('ipknot ' + tf.name, shell=True)
    121             return '\n'.join(self.ss_log.split('\n')[2:])
    122 

/home/magnus/work/opt/miniconda2/lib/python2.7/subprocess.pyc in check_output(*popenargs, **kwargs)
    217         if cmd is None:
    218             cmd = popenargs[0]
--> 219         raise CalledProcessError(retcode, cmd, output=output)
    220     return output
    221 

CalledProcessError: Command 'ipknot /var/folders/yc/ssr9692s5fzf7k165grnhpk80000gp/T/tmpbv_wLi.fa' returned non-zero exit status -4

In [34]:
print seq.predict_ss(method='centroid_fold')


---------------------------------------------------------------------------
CalledProcessError                        Traceback (most recent call last)
<ipython-input-34-a06a125e2463> in <module>()
----> 1 print seq.predict_ss(method='centroid_fold')

/Users/magnus/work/src/rna-pdb-tools/rna_pdb_tools/Seq.pyc in predict_ss(self, method, constraints, verbose)
    127 
    128         if method == "centroid_fold":
--> 129             self.ss_log = subprocess.check_output('centroid_fold ' + tf.name, shell=True)
    130             return '\n'.join(self.ss_log.split('\n')[2:])
    131 

/home/magnus/work/opt/miniconda2/lib/python2.7/subprocess.pyc in check_output(*popenargs, **kwargs)
    217         if cmd is None:
    218             cmd = popenargs[0]
--> 219         raise CalledProcessError(retcode, cmd, output=output)
    220     return output
    221 

CalledProcessError: Command 'centroid_fold /var/folders/yc/ssr9692s5fzf7k165grnhpk80000gp/T/tmpTVQqsa.fa' returned non-zero exit status -4

In [35]:
print seq.predict_ss(method='contextfold')


((......)).(((((((.................)))))))



In [28]:
# load an alignment
%load_ext autoreload
%autoreload 2
import rna_pdb_tools.utils.rna_alignment.rna_alignment as ra
a = ra.RNAalignment('/Users/magnus/Dropbox/RNA_Puzzles21/Alignment/RF01763.stockholm.txt')
a


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Out[28]:
SingleLetterAlphabet() alignment with 39 rows and 89 columns
GACGAGCCCG----GUGCG--------CGGGC--UCCCGGACGA...GA- AP006618.1/133448-133513
AAAGGGC-------CAAAG-----------GC--GCCCGGACGA...AAG CP000910.1/2096309-2096249
ACAGGCGUCG----CGUGUUC------CGGCG--CCCCGGACGA...GAU CP000511.1/851388-851320
GAUGGGCGC-----CCAUCC--------GCGC--UCCCGGACGA...AG- AM849034.1/2863085-2863022
CGCGCCCCCGCU--UUGCUAC----GGUGGGG-ACACCCGACGA...ACC ABJH01000172.1/10128-10201
CGUGAGG-------GUGU------------CU-GGCCCGGACGA...GAC CP000113.1/4374426-4374485
GUCGGGCGC-----CCUCCC--------GCGC--UCCCGGACGA...AG- AM711867.1/3063871-3063808
GCGGCGCUC-----CUUCAGACA-----GAGC-GUCCCGGACGA...AGC AM746676.1/8188359-8188291
AAAUCGUCG-----ACA-----------CGAC--AUCCGGACGA...CC- AP011115.1/5208127-5208067
GCCAUGUCA-----CCAGU---------UGAC---ACCGGACGA...UUU CP000580.1/1319367-1319304
GAUAUGCUAU----AAA----------AUAAU--CUCCGGACGA...CUU CR628336.1/661276-661213
GCAGGCGCCG----UCGAUC-------CGGCG--CCCCGGACGA...GUU CP000656.1/85042-85109
AGGCUUUCC-----CUUCC---------GGAG-CCUCCGGACGA...CA- CP000454.1/4240999-4241061
CGGU-CGGCC----UUAUA--------GGCUGCGCCCCGGACGA...GGA CP000359.1/2285879-2285947
UCAGUUUC------UGCGACU--------GAGGUGCCCGGACGA...UUU BX842652.1/324410-324345
GUAUGCCCUUUUGCUACG-----GUGGAGGGGAACCCCCGACGA...ACC ABJF01000294.1/7123-7044
CCAA-GGCUC----CGCACCC------GAGCC---UCCGGACGA...A-- CP001341.1/3865849-3865912
UCAGAGCCUG----GGAC---------CAGGC--UUCCGGACGA...GC- AAMN01000002.1/564690-564754
...
UUCGC-CGCG----GCUUG--------CGCGA-GCCCCGGACGA...AG- ACVP01000007.1/106973-107037

In [29]:
#print a.io.format("fasta")
ra.RChie().plot_cov(a.io.format("fasta"), a.ss_cons_std, verbose=False)


Rchie: plot saved to rchie.png
Out[29]:

In [12]:
import rna_pdb_tools.RfamSearch as rf
seq = Seq.RNASequence("CCGGACGAGGGGCGCCGUACCCGGUCAGCGACAAGACGGCGC")
rs = rf.RfamSearch()
print rs.cmscan(seq)


# cmscan :: search sequence(s) against a CM database
# INFERNAL 1.1.2 (July 2016)
# Copyright (C) 2016 Howard Hughes Medical Institute.
# Freely distributed under a BSD open source license.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# query sequence file:                   /var/folders/yc/ssr9692s5fzf7k165grnhpk80000gp/T/tmpUbTC2W.fa
# target CM database:                    /home/magnus/work/db/rfamdb/Rfam.cm
# sequence reporting threshold:          E-value <= 1
# number of worker threads:              4
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Query:       target  [L=42]
Hit scores:
 rank     E-value  score  bias  modelname  start    end   mdl trunc   gc  description
 ----   --------- ------ -----  --------- ------ ------   --- ----- ----  -----------
  (1) !   1.6e-11   55.5   0.1  ykkC-III       1     42 +  cm    no 0.76  -


Hit alignments:
>> ykkC-III  
 rank     E-value  score  bias mdl mdl from   mdl to       seq from      seq to       acc trunc   gc
 ----   --------- ------ ----- --- -------- --------    ----------- -----------      ---- ----- ----
  (1) !   1.6e-11   55.5   0.1  cm       23       64 ..           1          42 + [] 1.00    no 0.76

                                                         NC
              -----------<<<<<<<_________________>>>>>>> CS
  ykkC-III 23 CCGGACGAGggGcGcCGUACCCGGucaGcGACAAGACGgCgC 64
              CCGGACGAGGGGCGCCGUACCCGGUCAGCGACAAGACGGCGC
    target  1 CCGGACGAGGGGCGCCGUACCCGGUCAGCGACAAGACGGCGC 42
              ****************************************** PP



Internal HMM-only pipeline statistics summary: (run for model(s) with zero basepairs)
--------------------------------------------------------------------------------------
Query sequence(s):                                               1  (84 residues searched)
Target model(s):                                               347  (40911 consensus positions)
Windows   passing  local HMM SSV           filter:               2  (0.002642); expected (0.02)
Windows   passing  local HMM MSV      bias filter:               2  (0.002642); expected (0.02)
Windows   passing  local HMM Viterbi       filter:               0  (0); expected (0.001)
Windows   passing  local HMM Forward       filter:               0  (0); expected (1e-05)
Total HMM hits reported:                                         0  (0)

Internal CM pipeline statistics summary:
----------------------------------------
Query sequence(s):                                               1  (84 residues searched)
Query sequences re-searched for truncated hits:                  1  (250.9 residues re-searched, avg per model)
Target model(s):                                              2126  (267652 consensus positions)
Windows   passing  local HMM SSV           filter:             483  (0.02761); expected (0.35)
Windows   passing  local HMM Viterbi       filter:                  (off)
Windows   passing  local HMM Viterbi  bias filter:                  (off)
Windows   passing  local HMM Forward       filter:             168  (0.009685); expected (0.02)
Windows   passing  local HMM Forward  bias filter:             135  (0.007799); expected (0.02)
Windows   passing glocal HMM Forward       filter:              33  (0.001931); expected (0.02)
Windows   passing glocal HMM Forward  bias filter:              30  (0.001754); expected (0.02)
Envelopes passing glocal HMM envelope defn filter:              29  (0.001632); expected (0.02)
Envelopes passing  local CM  CYK           filter:               5  (0.0002641); expected (0.0001)
Total CM hits reported:                                          1  (5.9e-05); includes 0 truncated hit(s)

Total CM and HMM hits reported:                                  1

# CPU time: 0.45u 0.18s 00:00:00.63 Elapsed: 00:00:00.98
//
[ok]

PDB Blast search


In [13]:
p = BlastPDB(seq.seq)
p.search()
print p.result


<HTML>
<TITLE>BLAST Search Results</TITLE>
<BODY BGCOLOR="#FFFFFF" LINK="#0000FF" VLINK="#660099" ALINK="#660099">
<PRE>
<b>BLASTN 2.2.18 [Mar-02-2008]</b>


<b><a href="http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=PubMed&cmd=Retrieve&list_uids
=9254694&dopt=Citation">Reference</a>:</b>
Altschul, Stephen F., Thomas L. Madden, Alejandro A. Sch&auml;ffer, 
Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), 
"Gapped BLAST and PSI-BLAST: a new generation of protein database search
programs",  Nucleic Acids Res. 25:3389-3402.

<b>Query=</b> UNKNOWN_SEQUENCE
         (42 letters)

<b>Database:</b> pdb_nucleotide 
           15,339 sequences; 2,579,352 total letters

Searching..................................................done

<PRE>


                                                                 Score    E
Sequences producing significant alignments:                      (bits) Value

1M5L:1:A|pdbid|entity|chain(s)|sequence                               <a href = #1717> 26</a>   0.86 
5K0Y:2:A|pdbid|entity|chain(s)|sequence                               <a href = #15047> 24</a>   3.4  
</PRE>
<PRE>
><a name = 1717></a>1M5L:1:A|pdbid|entity|chain(s)|sequence
          Length = 38

 Score = 26.3 bits (13), Expect = 0.86
 Identities = 13/13 (100%)
 Strand = Plus / Plus

                       
Query: 3  ggacgaggggcgc 15
          |||||||||||||
Sbjct: 26 ggacgaggggcgc 38
</PRE>


<PRE>
><a name = 15047></a>5K0Y:2:A|pdbid|entity|chain(s)|sequence
          Length = 1776

 Score = 24.3 bits (12), Expect = 3.4
 Identities = 12/12 (100%)
 Strand = Plus / Plus

                       
Query: 19  acccggtcagcg 30
           ||||||||||||
Sbjct: 233 acccggtcagcg 244
</PRE>


<PRE>
  Database: pdb_nucleotide
    Posted date:  Aug 7, 2016 11:48 AM
  Number of letters in database: 2,579,352
  Number of sequences in database:  15,339
  
Lambda     K      H
    1.37    0.711     1.31 

Gapped
Lambda     K      H
    1.37    0.711     1.31 


Matrix: blastn matrix:1 -3
Gap Penalties: Existence: 5, Extension: 2
Number of Sequences: 15339
Number of Hits to DB: 523
Number of extensions: 20
Number of successful extensions: 20
Number of sequences better than 10.0: 2
Number of HSP's gapped: 20
Number of HSP's successfully gapped: 2
Length of query: 42
Length of database: 2,579,352
Length adjustment: 13
Effective length of query: 29
Effective length of database: 2,379,945
Effective search space: 69018405
Effective search space used: 69018405
X1: 9 (17.8 bits)
X2: 15 (29.7 bits)
X3: 50 (99.1 bits)
S1: 9 (18.3 bits)
S2: 12 (24.3 bits)
</PRE>
</BODY>
</HTML>

3D structure analysis


In [12]:
from rna_pdb_tools.pdb_parser_lib import RNAStructure

fn = "rna_pdb_tools/data/260c8ff6-f24e-4eff-9760-1831407fc770_ALL_thrs5.30A_clust01-000001_AA.pdb"

s = RNAStructure(fn)
print s.get_report()
print s.get_info_chains()
print s.get_head()
#print s.view() # image paste here :-)


The RNARNAStructure report: rna_pdb_tools/data/260c8ff6-f24e-4eff-9760-1831407fc770_ALL_thrs5.30A_clust01-000001_AA.pdb 
A:1-43 B:1-10
ATOM      1  P     G A   1     -12.509  18.639  13.726  1.00  0.00
ATOM      2  OP1   G A   1     -13.934  18.507  14.168  1.00  0.00
ATOM      3  OP2   G A   1     -11.541  17.557  14.097  1.00  0.00
ATOM      4  O5'   G A   1     -12.604  18.683  12.146  1.00  0.00
ATOM      5  C5'   G A   1     -13.512  19.569  11.525  1.00  0.00

In [13]:
%%bash
cd rna_pdb_tools
./rna-pdb-tools.py --no_hr --get_seq data/260c8ff6-f24e-4eff-9760-1831407fc770_ALL_thrs5.30A_clust01-000001_AA.pdb


bash: line 2: ./rna-pdb-tools.py: No such file or directory

RNA 3D structure prediction


In [14]:
# model using SimRNA
#res = SimRNA(ss,seq.get_ss())

In [15]:
# fake import, should be 
res = "rna_pdb_tools/data/260c8ff6-f24e-4eff-9760-1831407fc770_ALL_thrs5.30A_clust01-000001_AA.pdb"
# view
view = nglview.show_structure_file(res)
view.add_representation(repr_type='cartoon')
view

rna_pdb_tools --get_seq


In [16]:
%%bash
cd rna_pdb_tools
./rna-pdb-tools.py --no_hr --get_seq input/5k7c.pdb


bash: line 2: ./rna-pdb-tools.py: No such file or directory

In [17]:
%%bash
cd rna_pdb_tools
./rna-pdb-tools.py --no_hr --get_seq input/5k7c.pdb
./rna-pdb-tools.py --no_hr --get_seq input/tetraloop.pdb
./rna-pdb-tools.py --get_seq input/1xjr.pdb


bash: line 2: ./rna-pdb-tools.py: No such file or directory
bash: line 3: ./rna-pdb-tools.py: No such file or directory
bash: line 4: ./rna-pdb-tools.py: No such file or directory

In [ ]: