In [37]:
import nglview
from rna_tools.Seq import RNASequence
from rna_tools.BlastPDB import BlastPDB
%load_ext autoreload
%autoreload 2
The autoreload extension is already loaded. To reload it, use:
%reload_ext autoreload
In [8]:
seq = RNASequence("ACCCGCAAGGCCGACGGCGCCGCCGCUGGUGCAAGUCCAGCCACGCUUCGGCGUGGGCGCUCAUGGGU")
In [9]:
seq
Out[9]:
ACCCGCAAGGCCGACGGCGCCGCCGCUGGUGCAAGUCCAGCCACGCUUCGGCGUGGGCGCUCAUGGGU
In [10]:
print seq.predict_ss()
>rna_seq
ACCCGCAAGGCCGACGGCGCCGCCGCUGGUGCAAGUCCAGCCACGCUUCGGCGUGGGCGCUCAUGGGU
(((((..(((((.(((.((..((.(((((.......)))))...))..)).))).))).))..))))) (-32.50)
In [11]:
print seq.predict_ss(method='RNAsubopt')
>rna_seq [100]
ACCCGCAAGGCCGACGGCGCCGCCGCUGGUGCAAGUCCAGCCACGCUUCGGCGUGGGCGCUCAUGGGU -32.50 1.00
(((((..(((((.(((.((..((.(((((.......)))))...))..)).))).))).))..))))) -32.50
.((((..(((((.(((.((..((.(((((.......)))))...))..)).))).))).))..)))). -31.90
In [12]:
print seq.predict_ss(method='ipknot', verbose=True)
ACCCGCAAGGCCGACGGCGCCGCCGCUGGUGCAAGUCCAGCCACGCUUCGGCGUGGGCGCUCAUGGGU
..((....))(((..((((((...(((((.......)))))(((((....)))))))))))..)))..
In [13]:
print seq.predict_ss(method='contextfold')
(((((..........((((((...(((((.......)))))(((((....)))))))))))..)))))
In [58]:
import rna_tools.RfamSearch as RfamSearch
rs = RfamSearch.RfamSearch()
print rs.cmscan(seq)
('RFAM_DB_PATH', '/home/magnus/work/db/rfam/Rfam.cm')
cmscan -E 1 /home/magnus/work/db/rfam/Rfam.cm /var/folders/yc/ssr9692s5fzf7k165grnhpk80000gp/T/tmpCfvTrK.fa > /var/folders/yc/ssr9692s5fzf7k165grnhpk80000gp/T/tmphHDnj0
# cmscan :: search sequence(s) against a CM database
# INFERNAL 1.1.2 (July 2016)
# Copyright (C) 2016 Howard Hughes Medical Institute.
# Freely distributed under a BSD open source license.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# query sequence file: /var/folders/yc/ssr9692s5fzf7k165grnhpk80000gp/T/tmpCfvTrK.fa
# target CM database: /home/magnus/work/db/rfam/Rfam.cm
# sequence reporting threshold: E-value <= 1
# number of worker threads: 4
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Query: target [L=50]
Hit scores:
rank E-value score bias modelname start end mdl trunc gc description
---- --------- ------ ----- -------------- ------ ------ --- ----- ---- -----------
(1) ! 1.7e-08 42.7 0.0 Twister-sister 1 50 + cm 5'&3' 0.72 Twister_sister_ribozyme
Hit alignments:
>> Twister-sister Twister_sister_ribozyme
rank E-value score bias mdl mdl from mdl to seq from seq to acc trunc gc
---- --------- ------ ----- --- -------- -------- ----------- ----------- ---- ----- ----
(1) ! 1.7e-08 42.7 0.0 cm 27 79 ~~ 1 50 + ~~ 0.97 5'&3' 0.72
??? ??? ???? NC
~~~~~~_>>>,,<<<<_________>>>><<<<<<_____>>>>>>)))------))))~~~~~ CS
Twister-sister 1 <[26]*ccCGCCGCcGGUGCAAGCCCgGCCgCCCcagacagGGGcGGGCGCUCAUGGGU*[5]> 84
+CCGCCGC:GGUGCAAG CC:GCC:C:C: +:G:G:GGGCGCUCAUGGGU
target 1 <[ 0]*GCCGCCGCUGGUGCAAGUCCAGCCACGCUU---CGGCGUGGGCGCUCAUGGGU*[0]> 50
......9***************************95...49******************..... PP
Internal HMM-only pipeline statistics summary: (run for model(s) with zero basepairs)
--------------------------------------------------------------------------------------
Query sequence(s): 1 (100 residues searched)
Target model(s): 347 (40911 consensus positions)
Windows passing local HMM SSV filter: 2 (0.002882); expected (0.02)
Windows passing local HMM MSV bias filter: 2 (0.002882); expected (0.02)
Windows passing local HMM Viterbi filter: 0 (0); expected (0.001)
Windows passing local HMM Forward filter: 0 (0); expected (1e-05)
Total HMM hits reported: 0 (0)
Internal CM pipeline statistics summary:
----------------------------------------
Query sequence(s): 1 (100 residues searched)
Query sequences re-searched for truncated hits: 1 (297.2 residues re-searched, avg per model)
Target model(s): 2669 (345034 consensus positions)
Windows passing local HMM SSV filter: 798 (0.03589); expected (0.35)
Windows passing local HMM Viterbi filter: (off)
Windows passing local HMM Viterbi bias filter: (off)
Windows passing local HMM Forward filter: 290 (0.01307); expected (0.02)
Windows passing local HMM Forward bias filter: 251 (0.01136); expected (0.02)
Windows passing glocal HMM Forward filter: 59 (0.002673); expected (0.02)
Windows passing glocal HMM Forward bias filter: 55 (0.002484); expected (0.02)
Envelopes passing glocal HMM envelope defn filter: 54 (0.002259); expected (0.02)
Envelopes passing local CM CYK filter: 14 (0.0005188); expected (0.0001)
Total CM hits reported: 1 (9.432e-05); includes 1 truncated hit(s)
Total CM and HMM hits reported: 1
# CPU time: 0.99u 0.10s 00:00:01.09 Elapsed: 00:00:00.45
//
[ok]
In [60]:
import rna_tools.tools.rna_alignment.rna_alignment as ra
a = ra.RNAalignment(fetch="RF02681")# '/Users/magnus/work/sister_twister/RAGATH-3-twister-sister.sto')
a
Out[60]:
SingleLetterAlphabet() alignment with 4 rows and 85 columns
GCAACCCGCAAGGCCGACGCACAAC---GCGCCGCCGGUGCAAG...ACA ADJS01013948.1/250-330
GAAACCCGCUAGGCCGACAGCCUCACCGCUGCCGCUGGUGCAAG...CAG BABG01005008.1/780-696
ACGACCCGCAAGGCCGACGCAUAAC---GCGCCGCCGGUGCAAG...ACA ADJS01013948.1/577-657
AUGACCCGCAAGGCCGACGGCAUCCCG-CCGCCGCUGGUGCAAG...ACA FP929046.1/2708602-2708521
In [61]:
#print a.io.format("fasta")
ra.RChie().plot_cov(a.io.format("fasta"), a.ss_cons_std, verbose=False)
Rchie: plot saved to rchie.png
Out[61]:
In [63]:
a.find_seq("UGCAAGUC")
Out[63]:
SeqRecord(seq=Seq('AUGACCCGCAAGGCCGACGGCAUCCCG-CCGCCGCUGGUGCAAGCCCAGCCGCC...ACA', SingleLetterAlphabet()), id='FP929046.1/2708602-2708521', name='FP929046.1', description='FP929046.1/2708602-2708521', dbxrefs=[])
In [64]:
seq = RNASequence("GCCGCCGCUGGUGCAAGUCCAGCCACGCUUCGGCGUGGGCGCUCAUGGGU")
print rs.cmscan(seq)
('RFAM_DB_PATH', '/home/magnus/work/db/rfam/Rfam.cm')
cmscan -E 1 /home/magnus/work/db/rfam/Rfam.cm /var/folders/yc/ssr9692s5fzf7k165grnhpk80000gp/T/tmp4qzzKI.fa > /var/folders/yc/ssr9692s5fzf7k165grnhpk80000gp/T/tmpm7wPJ8
# cmscan :: search sequence(s) against a CM database
# INFERNAL 1.1.2 (July 2016)
# Copyright (C) 2016 Howard Hughes Medical Institute.
# Freely distributed under a BSD open source license.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# query sequence file: /var/folders/yc/ssr9692s5fzf7k165grnhpk80000gp/T/tmp4qzzKI.fa
# target CM database: /home/magnus/work/db/rfam/Rfam.cm
# sequence reporting threshold: E-value <= 1
# number of worker threads: 4
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Query: target [L=50]
Hit scores:
rank E-value score bias modelname start end mdl trunc gc description
---- --------- ------ ----- -------------- ------ ------ --- ----- ---- -----------
(1) ! 1.7e-08 42.7 0.0 Twister-sister 1 50 + cm 5'&3' 0.72 Twister_sister_ribozyme
Hit alignments:
>> Twister-sister Twister_sister_ribozyme
rank E-value score bias mdl mdl from mdl to seq from seq to acc trunc gc
---- --------- ------ ----- --- -------- -------- ----------- ----------- ---- ----- ----
(1) ! 1.7e-08 42.7 0.0 cm 27 79 ~~ 1 50 + ~~ 0.97 5'&3' 0.72
??? ??? ???? NC
~~~~~~_>>>,,<<<<_________>>>><<<<<<_____>>>>>>)))------))))~~~~~ CS
Twister-sister 1 <[26]*ccCGCCGCcGGUGCAAGCCCgGCCgCCCcagacagGGGcGGGCGCUCAUGGGU*[5]> 84
+CCGCCGC:GGUGCAAG CC:GCC:C:C: +:G:G:GGGCGCUCAUGGGU
target 1 <[ 0]*GCCGCCGCUGGUGCAAGUCCAGCCACGCUU---CGGCGUGGGCGCUCAUGGGU*[0]> 50
......9***************************95...49******************..... PP
Internal HMM-only pipeline statistics summary: (run for model(s) with zero basepairs)
--------------------------------------------------------------------------------------
Query sequence(s): 1 (100 residues searched)
Target model(s): 347 (40911 consensus positions)
Windows passing local HMM SSV filter: 2 (0.002882); expected (0.02)
Windows passing local HMM MSV bias filter: 2 (0.002882); expected (0.02)
Windows passing local HMM Viterbi filter: 0 (0); expected (0.001)
Windows passing local HMM Forward filter: 0 (0); expected (1e-05)
Total HMM hits reported: 0 (0)
Internal CM pipeline statistics summary:
----------------------------------------
Query sequence(s): 1 (100 residues searched)
Query sequences re-searched for truncated hits: 1 (297.2 residues re-searched, avg per model)
Target model(s): 2669 (345034 consensus positions)
Windows passing local HMM SSV filter: 798 (0.03589); expected (0.35)
Windows passing local HMM Viterbi filter: (off)
Windows passing local HMM Viterbi bias filter: (off)
Windows passing local HMM Forward filter: 290 (0.01307); expected (0.02)
Windows passing local HMM Forward bias filter: 251 (0.01136); expected (0.02)
Windows passing glocal HMM Forward filter: 59 (0.002673); expected (0.02)
Windows passing glocal HMM Forward bias filter: 55 (0.002484); expected (0.02)
Envelopes passing glocal HMM envelope defn filter: 54 (0.002259); expected (0.02)
Envelopes passing local CM CYK filter: 14 (0.0005188); expected (0.0001)
Total CM hits reported: 1 (9.432e-05); includes 1 truncated hit(s)
Total CM and HMM hits reported: 1
# CPU time: 0.95u 0.09s 00:00:01.04 Elapsed: 00:00:00.37
//
[ok]
In [65]:
p = BlastPDB(seq.seq)
p.search()
print p.result
<HTML>
<TITLE>BLAST Search Results</TITLE>
<BODY BGCOLOR="#FFFFFF" LINK="#0000FF" VLINK="#660099" ALINK="#660099">
<PRE>
<b>BLASTN 2.2.18 [Mar-02-2008]</b>
<b><a href="http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=PubMed&cmd=Retrieve&list_uids
=9254694&dopt=Citation">Reference</a>:</b>
Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schäffer,
Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997),
"Gapped BLAST and PSI-BLAST: a new generation of protein database search
programs", Nucleic Acids Res. 25:3389-3402.
<b>Query=</b> UNKNOWN_SEQUENCE
(50 letters)
<b>Database:</b> pdb_nucleotide
20,601 sequences; 4,025,796 total letters
Searching..................................................done
<PRE>
Score E
Sequences producing significant alignments: (bits) Value
5Y87:2:B,D|pdbid|entity|chain(s)|sequence <a href = #17652>100</a> 1e-22
5Y85:2:B,D|pdbid|entity|chain(s)|sequence <a href = #17650>100</a> 1e-22
2AU4:1:A|pdbid|entity|chain(s)|sequence <a href = #3796> 24</a> 6.6
</PRE>
<PRE>
><a name = 17652></a>5Y87:2:B,D|pdbid|entity|chain(s)|sequence
Length = 50
Score = 99.6 bits (50), Expect = 1e-22
Identities = 50/50 (100%)
Strand = Plus / Plus
Query: 1 gccgccgctggtgcaagtccagccacgcttcggcgtgggcgctcatgggt 50
||||||||||||||||||||||||||||||||||||||||||||||||||
Sbjct: 1 gccgccgctggtgcaagtccagccacgcttcggcgtgggcgctcatgggt 50
</PRE>
<PRE>
><a name = 17650></a>5Y85:2:B,D|pdbid|entity|chain(s)|sequence
Length = 50
Score = 99.6 bits (50), Expect = 1e-22
Identities = 50/50 (100%)
Strand = Plus / Plus
Query: 1 gccgccgctggtgcaagtccagccacgcttcggcgtgggcgctcatgggt 50
||||||||||||||||||||||||||||||||||||||||||||||||||
Sbjct: 1 gccgccgctggtgcaagtccagccacgcttcggcgtgggcgctcatgggt 50
</PRE>
<PRE>
><a name = 3796></a>2AU4:1:A|pdbid|entity|chain(s)|sequence
Length = 41
Score = 24.3 bits (12), Expect = 6.6
Identities = 12/12 (100%)
Strand = Plus / Plus
Query: 26 cgcttcggcgtg 37
||||||||||||
Sbjct: 18 cgcttcggcgtg 29
</PRE>
<PRE>
Database: pdb_nucleotide
Posted date: May 31, 2019 2:22 PM
Number of letters in database: 4,025,796
Number of sequences in database: 20,601
Lambda K H
1.37 0.711 1.31
Gapped
Lambda K H
1.37 0.711 1.31
Matrix: blastn matrix:1 -3
Gap Penalties: Existence: 5, Extension: 2
Number of Sequences: 20601
Number of Hits to DB: 2112
Number of extensions: 17
Number of successful extensions: 16
Number of sequences better than 10.0: 3
Number of HSP's gapped: 16
Number of HSP's successfully gapped: 3
Length of query: 50
Length of database: 4,025,796
Length adjustment: 14
Effective length of query: 36
Effective length of database: 3,737,382
Effective search space: 134545752
Effective search space used: 134545752
X1: 10 (19.8 bits)
X2: 15 (29.7 bits)
X3: 50 (99.1 bits)
S1: 10 (20.3 bits)
S2: 12 (24.3 bits)
</PRE>
</BODY>
</HTML>
Content source: m4rx9/rna-pdb-tools
Similar notebooks: