RNA-Puzzle 20 - Twister sister ribozyme


In [37]:
import nglview
from rna_tools.Seq import RNASequence
from rna_tools.BlastPDB import BlastPDB
%load_ext autoreload
%autoreload 2


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload

In [8]:
seq = RNASequence("ACCCGCAAGGCCGACGGCGCCGCCGCUGGUGCAAGUCCAGCCACGCUUCGGCGUGGGCGCUCAUGGGU")

In [9]:
seq


Out[9]:
ACCCGCAAGGCCGACGGCGCCGCCGCUGGUGCAAGUCCAGCCACGCUUCGGCGUGGGCGCUCAUGGGU

Secondary structure prediction


In [10]:
print seq.predict_ss()


>rna_seq
ACCCGCAAGGCCGACGGCGCCGCCGCUGGUGCAAGUCCAGCCACGCUUCGGCGUGGGCGCUCAUGGGU
(((((..(((((.(((.((..((.(((((.......)))))...))..)).))).))).))..))))) (-32.50)

In [11]:
print seq.predict_ss(method='RNAsubopt')


>rna_seq [100]
ACCCGCAAGGCCGACGGCGCCGCCGCUGGUGCAAGUCCAGCCACGCUUCGGCGUGGGCGCUCAUGGGU -32.50   1.00
(((((..(((((.(((.((..((.(((((.......)))))...))..)).))).))).))..))))) -32.50
.((((..(((((.(((.((..((.(((((.......)))))...))..)).))).))).))..)))). -31.90


In [12]:
print seq.predict_ss(method='ipknot', verbose=True)


ACCCGCAAGGCCGACGGCGCCGCCGCUGGUGCAAGUCCAGCCACGCUUCGGCGUGGGCGCUCAUGGGU
..((....))(((..((((((...(((((.......)))))(((((....)))))))))))..)))..


In [13]:
print seq.predict_ss(method='contextfold')


(((((..........((((((...(((((.......)))))(((((....)))))))))))..)))))


Search Rfam


In [58]:
import rna_tools.RfamSearch as RfamSearch
rs = RfamSearch.RfamSearch()
print rs.cmscan(seq)


('RFAM_DB_PATH', '/home/magnus/work/db/rfam/Rfam.cm')
cmscan -E 1 /home/magnus/work/db/rfam/Rfam.cm /var/folders/yc/ssr9692s5fzf7k165grnhpk80000gp/T/tmpCfvTrK.fa  > /var/folders/yc/ssr9692s5fzf7k165grnhpk80000gp/T/tmphHDnj0
# cmscan :: search sequence(s) against a CM database
# INFERNAL 1.1.2 (July 2016)
# Copyright (C) 2016 Howard Hughes Medical Institute.
# Freely distributed under a BSD open source license.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# query sequence file:                   /var/folders/yc/ssr9692s5fzf7k165grnhpk80000gp/T/tmpCfvTrK.fa
# target CM database:                    /home/magnus/work/db/rfam/Rfam.cm
# sequence reporting threshold:          E-value <= 1
# number of worker threads:              4
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Query:       target  [L=50]
Hit scores:
 rank     E-value  score  bias  modelname       start    end   mdl trunc   gc  description
 ----   --------- ------ -----  -------------- ------ ------   --- ----- ----  -----------
  (1) !   1.7e-08   42.7   0.0  Twister-sister      1     50 +  cm 5'&3' 0.72  Twister_sister_ribozyme


Hit alignments:
>> Twister-sister  Twister_sister_ribozyme
 rank     E-value  score  bias mdl mdl from   mdl to       seq from      seq to       acc trunc   gc
 ----   --------- ------ ----- --- -------- --------    ----------- -----------      ---- ----- ----
  (1) !   1.7e-08   42.7   0.0  cm       27       79 ~~           1          50 + ~~ 0.97 5'&3' 0.72

                           ???                                    ???      ????      NC
                    ~~~~~~_>>>,,<<<<_________>>>><<<<<<_____>>>>>>)))------))))~~~~~ CS
  Twister-sister  1 <[26]*ccCGCCGCcGGUGCAAGCCCgGCCgCCCcagacagGGGcGGGCGCUCAUGGGU*[5]> 84
                          +CCGCCGC:GGUGCAAG CC:GCC:C:C:    +:G:G:GGGCGCUCAUGGGU     
          target  1 <[ 0]*GCCGCCGCUGGUGCAAGUCCAGCCACGCUU---CGGCGUGGGCGCUCAUGGGU*[0]> 50
                    ......9***************************95...49******************..... PP



Internal HMM-only pipeline statistics summary: (run for model(s) with zero basepairs)
--------------------------------------------------------------------------------------
Query sequence(s):                                               1  (100 residues searched)
Target model(s):                                               347  (40911 consensus positions)
Windows   passing  local HMM SSV           filter:               2  (0.002882); expected (0.02)
Windows   passing  local HMM MSV      bias filter:               2  (0.002882); expected (0.02)
Windows   passing  local HMM Viterbi       filter:               0  (0); expected (0.001)
Windows   passing  local HMM Forward       filter:               0  (0); expected (1e-05)
Total HMM hits reported:                                         0  (0)

Internal CM pipeline statistics summary:
----------------------------------------
Query sequence(s):                                               1  (100 residues searched)
Query sequences re-searched for truncated hits:                  1  (297.2 residues re-searched, avg per model)
Target model(s):                                              2669  (345034 consensus positions)
Windows   passing  local HMM SSV           filter:             798  (0.03589); expected (0.35)
Windows   passing  local HMM Viterbi       filter:                  (off)
Windows   passing  local HMM Viterbi  bias filter:                  (off)
Windows   passing  local HMM Forward       filter:             290  (0.01307); expected (0.02)
Windows   passing  local HMM Forward  bias filter:             251  (0.01136); expected (0.02)
Windows   passing glocal HMM Forward       filter:              59  (0.002673); expected (0.02)
Windows   passing glocal HMM Forward  bias filter:              55  (0.002484); expected (0.02)
Envelopes passing glocal HMM envelope defn filter:              54  (0.002259); expected (0.02)
Envelopes passing  local CM  CYK           filter:              14  (0.0005188); expected (0.0001)
Total CM hits reported:                                          1  (9.432e-05); includes 1 truncated hit(s)

Total CM and HMM hits reported:                                  1

# CPU time: 0.99u 0.10s 00:00:01.09 Elapsed: 00:00:00.45
//
[ok]


In [60]:
import rna_tools.tools.rna_alignment.rna_alignment as ra
a = ra.RNAalignment(fetch="RF02681")# '/Users/magnus/work/sister_twister/RAGATH-3-twister-sister.sto')
a


Out[60]:
SingleLetterAlphabet() alignment with 4 rows and 85 columns
GCAACCCGCAAGGCCGACGCACAAC---GCGCCGCCGGUGCAAG...ACA ADJS01013948.1/250-330
GAAACCCGCUAGGCCGACAGCCUCACCGCUGCCGCUGGUGCAAG...CAG BABG01005008.1/780-696
ACGACCCGCAAGGCCGACGCAUAAC---GCGCCGCCGGUGCAAG...ACA ADJS01013948.1/577-657
AUGACCCGCAAGGCCGACGGCAUCCCG-CCGCCGCUGGUGCAAG...ACA FP929046.1/2708602-2708521

In [61]:
#print a.io.format("fasta")
ra.RChie().plot_cov(a.io.format("fasta"), a.ss_cons_std, verbose=False)


Rchie: plot saved to rchie.png
Out[61]:

In [63]:
a.find_seq("UGCAAGUC")


Out[63]:
SeqRecord(seq=Seq('AUGACCCGCAAGGCCGACGGCAUCCCG-CCGCCGCUGGUGCAAGCCCAGCCGCC...ACA', SingleLetterAlphabet()), id='FP929046.1/2708602-2708521', name='FP929046.1', description='FP929046.1/2708602-2708521', dbxrefs=[])

In [64]:
seq = RNASequence("GCCGCCGCUGGUGCAAGUCCAGCCACGCUUCGGCGUGGGCGCUCAUGGGU")
print rs.cmscan(seq)


('RFAM_DB_PATH', '/home/magnus/work/db/rfam/Rfam.cm')
cmscan -E 1 /home/magnus/work/db/rfam/Rfam.cm /var/folders/yc/ssr9692s5fzf7k165grnhpk80000gp/T/tmp4qzzKI.fa  > /var/folders/yc/ssr9692s5fzf7k165grnhpk80000gp/T/tmpm7wPJ8
# cmscan :: search sequence(s) against a CM database
# INFERNAL 1.1.2 (July 2016)
# Copyright (C) 2016 Howard Hughes Medical Institute.
# Freely distributed under a BSD open source license.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# query sequence file:                   /var/folders/yc/ssr9692s5fzf7k165grnhpk80000gp/T/tmp4qzzKI.fa
# target CM database:                    /home/magnus/work/db/rfam/Rfam.cm
# sequence reporting threshold:          E-value <= 1
# number of worker threads:              4
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Query:       target  [L=50]
Hit scores:
 rank     E-value  score  bias  modelname       start    end   mdl trunc   gc  description
 ----   --------- ------ -----  -------------- ------ ------   --- ----- ----  -----------
  (1) !   1.7e-08   42.7   0.0  Twister-sister      1     50 +  cm 5'&3' 0.72  Twister_sister_ribozyme


Hit alignments:
>> Twister-sister  Twister_sister_ribozyme
 rank     E-value  score  bias mdl mdl from   mdl to       seq from      seq to       acc trunc   gc
 ----   --------- ------ ----- --- -------- --------    ----------- -----------      ---- ----- ----
  (1) !   1.7e-08   42.7   0.0  cm       27       79 ~~           1          50 + ~~ 0.97 5'&3' 0.72

                           ???                                    ???      ????      NC
                    ~~~~~~_>>>,,<<<<_________>>>><<<<<<_____>>>>>>)))------))))~~~~~ CS
  Twister-sister  1 <[26]*ccCGCCGCcGGUGCAAGCCCgGCCgCCCcagacagGGGcGGGCGCUCAUGGGU*[5]> 84
                          +CCGCCGC:GGUGCAAG CC:GCC:C:C:    +:G:G:GGGCGCUCAUGGGU     
          target  1 <[ 0]*GCCGCCGCUGGUGCAAGUCCAGCCACGCUU---CGGCGUGGGCGCUCAUGGGU*[0]> 50
                    ......9***************************95...49******************..... PP



Internal HMM-only pipeline statistics summary: (run for model(s) with zero basepairs)
--------------------------------------------------------------------------------------
Query sequence(s):                                               1  (100 residues searched)
Target model(s):                                               347  (40911 consensus positions)
Windows   passing  local HMM SSV           filter:               2  (0.002882); expected (0.02)
Windows   passing  local HMM MSV      bias filter:               2  (0.002882); expected (0.02)
Windows   passing  local HMM Viterbi       filter:               0  (0); expected (0.001)
Windows   passing  local HMM Forward       filter:               0  (0); expected (1e-05)
Total HMM hits reported:                                         0  (0)

Internal CM pipeline statistics summary:
----------------------------------------
Query sequence(s):                                               1  (100 residues searched)
Query sequences re-searched for truncated hits:                  1  (297.2 residues re-searched, avg per model)
Target model(s):                                              2669  (345034 consensus positions)
Windows   passing  local HMM SSV           filter:             798  (0.03589); expected (0.35)
Windows   passing  local HMM Viterbi       filter:                  (off)
Windows   passing  local HMM Viterbi  bias filter:                  (off)
Windows   passing  local HMM Forward       filter:             290  (0.01307); expected (0.02)
Windows   passing  local HMM Forward  bias filter:             251  (0.01136); expected (0.02)
Windows   passing glocal HMM Forward       filter:              59  (0.002673); expected (0.02)
Windows   passing glocal HMM Forward  bias filter:              55  (0.002484); expected (0.02)
Envelopes passing glocal HMM envelope defn filter:              54  (0.002259); expected (0.02)
Envelopes passing  local CM  CYK           filter:              14  (0.0005188); expected (0.0001)
Total CM hits reported:                                          1  (9.432e-05); includes 1 truncated hit(s)

Total CM and HMM hits reported:                                  1

# CPU time: 0.95u 0.09s 00:00:01.04 Elapsed: 00:00:00.37
//
[ok]

PDB Blast search


In [65]:
p = BlastPDB(seq.seq)
p.search()
print p.result


<HTML>
<TITLE>BLAST Search Results</TITLE>
<BODY BGCOLOR="#FFFFFF" LINK="#0000FF" VLINK="#660099" ALINK="#660099">
<PRE>
<b>BLASTN 2.2.18 [Mar-02-2008]</b>


<b><a href="http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=PubMed&cmd=Retrieve&list_uids
=9254694&dopt=Citation">Reference</a>:</b>
Altschul, Stephen F., Thomas L. Madden, Alejandro A. Sch&auml;ffer, 
Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), 
"Gapped BLAST and PSI-BLAST: a new generation of protein database search
programs",  Nucleic Acids Res. 25:3389-3402.

<b>Query=</b> UNKNOWN_SEQUENCE
         (50 letters)

<b>Database:</b> pdb_nucleotide 
           20,601 sequences; 4,025,796 total letters

Searching..................................................done

<PRE>


                                                                 Score    E
Sequences producing significant alignments:                      (bits) Value

5Y87:2:B,D|pdbid|entity|chain(s)|sequence                             <a href = #17652>100</a>   1e-22
5Y85:2:B,D|pdbid|entity|chain(s)|sequence                             <a href = #17650>100</a>   1e-22
2AU4:1:A|pdbid|entity|chain(s)|sequence                               <a href = #3796> 24</a>   6.6  
</PRE>
<PRE>
><a name = 17652></a>5Y87:2:B,D|pdbid|entity|chain(s)|sequence
          Length = 50

 Score = 99.6 bits (50), Expect = 1e-22
 Identities = 50/50 (100%)
 Strand = Plus / Plus

                                                            
Query: 1  gccgccgctggtgcaagtccagccacgcttcggcgtgggcgctcatgggt 50
          ||||||||||||||||||||||||||||||||||||||||||||||||||
Sbjct: 1  gccgccgctggtgcaagtccagccacgcttcggcgtgggcgctcatgggt 50
</PRE>


<PRE>
><a name = 17650></a>5Y85:2:B,D|pdbid|entity|chain(s)|sequence
          Length = 50

 Score = 99.6 bits (50), Expect = 1e-22
 Identities = 50/50 (100%)
 Strand = Plus / Plus

                                                            
Query: 1  gccgccgctggtgcaagtccagccacgcttcggcgtgggcgctcatgggt 50
          ||||||||||||||||||||||||||||||||||||||||||||||||||
Sbjct: 1  gccgccgctggtgcaagtccagccacgcttcggcgtgggcgctcatgggt 50
</PRE>


<PRE>
><a name = 3796></a>2AU4:1:A|pdbid|entity|chain(s)|sequence
          Length = 41

 Score = 24.3 bits (12), Expect = 6.6
 Identities = 12/12 (100%)
 Strand = Plus / Plus

                      
Query: 26 cgcttcggcgtg 37
          ||||||||||||
Sbjct: 18 cgcttcggcgtg 29
</PRE>


<PRE>
  Database: pdb_nucleotide
    Posted date:  May 31, 2019  2:22 PM
  Number of letters in database: 4,025,796
  Number of sequences in database:  20,601
  
Lambda     K      H
    1.37    0.711     1.31 

Gapped
Lambda     K      H
    1.37    0.711     1.31 


Matrix: blastn matrix:1 -3
Gap Penalties: Existence: 5, Extension: 2
Number of Sequences: 20601
Number of Hits to DB: 2112
Number of extensions: 17
Number of successful extensions: 16
Number of sequences better than 10.0: 3
Number of HSP's gapped: 16
Number of HSP's successfully gapped: 3
Length of query: 50
Length of database: 4,025,796
Length adjustment: 14
Effective length of query: 36
Effective length of database: 3,737,382
Effective search space: 134545752
Effective search space used: 134545752
X1: 10 (19.8 bits)
X2: 15 (29.7 bits)
X3: 50 (99.1 bits)
S1: 10 (20.3 bits)
S2: 12 (24.3 bits)
</PRE>
</BODY>
</HTML>