In [1]:
%autosave 0
from __future__ import print_function


Autosave disabled

EXAMPLE 7. Search for double-stranded RNA motif in database

We will now search for a double-stranded motif within the crystal structure of the large ribosomal subunit.

This can be performed using the ds_motif function, that is very similar to the function ss_motif. It is necessary to specify the number of nucleotides in the first (l1) and second (l2) strand.
It is possible to specify the maximum number of allowed inserted or bulged residues with the option bulges.
In the example we use a threshold in ERMSD of 0.6 - relevant hits have a distance between 0.6-0.9 eRMSD. If you specify the optional keyword out, PDB structures are written to disk.
By default the search is performed not considering the sequence. It is possible to specify a sequence with the sequence option. abbreviations (i.e N/R/Y) are accepted.


In [2]:
import barnaba as bb

# find all SARCIN motifs in H.Marismortui large ribosomal subunit (PDB 1S72)

query = "../test/data/SARCIN.pdb" 
pdb = "../test/data/1S72.pdb" 

# call function. 
results = bb.ds_motif(query,pdb,l1=8,l2=7,bulges=0,threshold=0.7,out='sarcin_motif')


# Loaded query ../test/data/SARCIN.pdb 
# Loaded target ../test/data/1S72.pdb 
# Treating nucleotide 1MA628 as A 
# Treating nucleotide OMU2587 as U 
# Treating nucleotide OMG2588 as G 
# Treating nucleotide UR32619 as U 
# Treating nucleotide PSU2621 as U 

Now we print distances and sequences


In [3]:
import glob

pdbs = glob.glob("sarcin*.pdb")

for j in range(len(results)):
    seq = ",".join([r for r in results[j][2]])
    print("%2d eRMSD:%5.3f" % (j,results[j][1]))
    print("     Sequence: %s" % seq)


 0 eRMSD:0.394
     Sequence: C_171_0,U_172_0,C_173_0,A_174_0,G_175_0,U_176_0,A_177_0,U_178_0,A_158_0,G_159_0,A_160_0,A_161_0,C_162_0,U_163_0,G_164_0
 1 eRMSD:0.511
     Sequence: G_209_0,U_210_0,U_211_0,A_212_0,G_213_0,U_214_0,A_215_0,A_216_0,U_224_0,G_225_0,A_226_0,A_227_0,C_228_0,G_229_0,C_230_0
 2 eRMSD:0.602
     Sequence: A_354_0,C_355_0,C_356_0,A_357_0,G_358_0,U_359_0,A_360_0,C_361_0,C_291_0,G_292_0,A_293_0,C_294_0,C_295_0,G_296_0,U_297_0
 3 eRMSD:0.599
     Sequence: U_584_0,C_585_0,C_586_0,A_587_0,G_588_0,U_589_0,A_590_0,A_591_0,U_567_0,G_568_0,A_569_0,C_570_0,C_571_0,G_572_0,A_573_0
 4 eRMSD:0.572
     Sequence: C_1366_0,A_1367_0,U_1368_0,A_1369_0,G_1370_0,U_1371_0,A_1372_0,G_1373_0,U_2052_0,G_2053_0,A_2054_0,A_2055_0,C_2056_0,U_2057_0,G_2058_0
 5 eRMSD:0.481
     Sequence: U_2688_0,A_2689_0,U_2690_0,A_2691_0,G_2692_0,U_2693_0,A_2694_0,C_2695_0,G_2700_0,G_2701_0,A_2702_0,A_2703_0,C_2704_0,U_2705_0,A_2706_0
 6 eRMSD:0.536
     Sequence: G_74_1,G_75_1,G_76_1,A_77_1,G_78_1,U_79_1,A_80_1,C_81_1,G_101_1,G_102_1,A_103_1,A_104_1,A_105_1,C_106_1,C_107_1

Finally, we visualize the query and the first hit


In [4]:
import py3Dmol

query_s = open(query,'r').read()
hit_1 = open(pdbs[1],'r').read()

p = py3Dmol.view(width=900,height=600,viewergrid=(1,2))
#p = py3Dmol.view(width=900,height=600)
#p.addModel(query_s,'pdb')
p.addModel(query_s,'pdb',viewer=(0,0))
p.addModel(hit_1,'pdb',viewer=(0,1))
p.setStyle({'stick':{}})
p.setBackgroundColor('0xeeeeee')
p.zoomTo()
p.show()


Out[4]:

In [5]:
# annotate native
stackings_query, pairings_query, res_query = bb.annotate(query)
print("Query BASE-PAIR")
for p in range(len(pairings_query[0][0])):
    print(res_query[pairings_query[0][0][p][0]], end=" ")
    print(res_query[pairings_query[0][0][p][1]], end=" ")
    print(pairings_query[0][1][p])
    
print()
stackings_hit, pairings_hit, res_hit = bb.annotate(pdbs[1])

print("Hit 2 base-pairs")
for p in range(len(pairings_hit[0][0])):
    print(res_hit[pairings_hit[0][0][p][0]], end=" ")
    print(res_hit[pairings_hit[0][0][p][1]], end=" ")
    print(pairings_hit[0][1][p])

print()


Query BASE-PAIR
C_6_0 G_24_0 WCc
U_7_0 C_23_0 WHc
C_8_0 C_22_0 SHt
A_9_0 A_21_0 HHt
G_10_0 U_11_0 SHc
U_11_0 A_20_0 WHt
A_12_0 G_19_0 HSc
U_13_0 A_18_0 WCc

Hit 2 base-pairs
G_209_0 C_230_0 WCc
U_210_0 G_229_0 GUc
U_211_0 C_228_0 SHt
A_212_0 A_227_0 HHt
G_213_0 U_214_0 SHc
U_214_0 A_226_0 WHt
A_215_0 G_225_0 HSc
A_216_0 U_224_0 WCc

# Loading ../test/data/SARCIN.pdb 
# Loading sarcin_motif_00002_U_0.pdb