2.1 Comparing two similar models

2.2 Comparing many similar models in single run

3 mcq-cli

3.1 Local

4 rnaqua: A tool supporting quality assessment of RNA 3D structures

BasicAssessMetrics



In [57]:

    
%%bash
curr_path = `pwd`
cd opt
git clone https://github.com/RNA-Puzzles/BasicAssessMetrics.git









    



Cloning into 'BasicAssessMetrics'...



In [60]:









    Out[60]:





u'/Users/magnus/work-src/rna-pdb-tools/opt'



In [61]:

    
import sys
import os

import pdb_utils
import utils
import extract

from operator import attrgetter

RESIDUES_LIST = "data/residues.list"
ATOMS_LIST = "data/atoms.list"

def CleanFormat(f):
    """
    CleanFormat is a function used to format different platform formats to unix. Users need to install dos2unix
    """
    os.system( "mac2unix -q %s" %f )
    os.system( "dos2unix -q %s" %f )



In [62]:

    
def normalize_structure(struct, out_file = None, index_file=None, extract_file = None):
    pdb_normalizer = pdb_utils.PDBNormalizer( RESIDUES_LIST, ATOMS_LIST )
    ok = pdb_normalizer.parse( struct, out_file )
    if not ok:
        sys.stderr.write("ERROR: structure not normalized!\n")
    else:
        sys.stderr.write("INFO: Normalization succeded!\n")
    if not extract_file is None:
        coords=open(index_file).read()
        extract.extract_PDB(SOLUTION_NORMAL,coords, extract_file)
        sys.stderr.write("INFO:	structure extracted\n")



In [63]:

    
# PVALUE set according to Hajdin et al., RNA (7) 16, 2010, either "+" or "-"
def calc_RMSD(native_file, native_index, prediction_file, prediction_index, PVALUE = "-"):
    res_struct = pdb_utils.PDBStruct()
    res_struct.load( native_file, native_index )
    res_raw_seq = res_struct.raw_sequence()

    sol_struct = pdb_utils.PDBStruct()
    sol_struct.load( prediction_file, prediction_index )
    sol_raw_seq = sol_struct.raw_sequence()

    if( sol_raw_seq != res_raw_seq ):
        sys.stderr.write("ERROR Result sequence != Solution sequence!\n")
        sys.stderr.write("DATA Solution sequence --> '%s'\n" %sol_raw_seq )
        sys.stderr.write("DATA Result sequence   --> '%s'\n" %res_raw_seq )
        return(-1)
    # computes the RMSD
    comparer = pdb_utils.PDBComparer()
    rmsd = comparer.rmsd( sol_struct, res_struct )
    sys.stderr.write("INFO Partial RMSD --> %f\n" %rmsd )
    pvalue = comparer.pvalue( rmsd, len(sol_raw_seq), PVALUE )
    sys.stderr.write("INFO Partial P-Value --> %e\n" %pvalue )
    return(rmsd, pvalue)



In [10]:

    
def InteractionNetworkFidelity(native_file, native_index, prediction_file, prediction_index):
    res_struct = pdb_utils.PDBStruct()
    res_struct.load( native_file, native_index )
    res_raw_seq = res_struct.raw_sequence()

    sol_struct = pdb_utils.PDBStruct()
    sol_struct.load( prediction_file, prediction_index )
    sol_raw_seq = sol_struct.raw_sequence()

    if( sol_raw_seq != res_raw_seq ):
        sys.stderr.write("ERROR Result sequence != Solution sequence!\n")
        sys.stderr.write("DATA Solution sequence --> '%s'\n" %sol_raw_seq )
        sys.stderr.write("DATA Result sequence	 --> '%s'\n" %res_raw_seq )
        return(-1)
    # computes the RMSD
    comparer = pdb_utils.PDBComparer()
    rmsd = comparer.rmsd( sol_struct, res_struct )
    INF_ALL = comparer.INF( sol_struct, res_struct, type="ALL" )
    DI_ALL = rmsd / INF_ALL
    INF_WC = comparer.INF( sol_struct, res_struct, type="PAIR_2D" )
    INF_NWC = comparer.INF( sol_struct, res_struct, type="PAIR_3D" )
    INF_STACK = comparer.INF( sol_struct, res_struct, type="STACK" )
    return (rmsd,DI_ALL, INF_ALL, INF_WC, INF_NWC,INF_STACK)



In [41]:

    
# Normalize PDB format, correct residue names and atom names. 
normalize_structure('example/14_solution_0.pdb','example/14_solution_normalized.pdb')

# calculate RMSD for RNA structures
# require biopython
rmsd, pvalue = calc_RMSD("example/14_solution_0.pdb",
        "example/14_solution_0.index",
        "example/14_ChenPostExp_2.pdb",
        "example/14_ChenPostExp_2.index")

print '14_ChenPostExp_2'
print '  RMSD:', rmsd
print '  pvalue:', pvalue

# calculate InteractionNetworkFidelity and Deformation Index for RNA structures
# need to have MA-annotate in the directory or set in mcannotate.py
rmsd, DI_ALL, INF_ALL, INF_WC, INF_NWC,INF_STACK = InteractionNetworkFidelity("example/14_solution_0.pdb",
          "example/14_solution_0.index",
          "example/14_ChenPostExp_2.pdb",
          "example/14_ChenPostExp_2.index")

print '14_ChenPostExp_2, rmsd', rmsd
print "  DI_ALL:", DI_ALL
print "  INF_ALL:", INF_ALL









    



INFO: Normalization succeded!
WARNING    >> Atom P from residue (' ', 1, ' ') not found in target atom list
WARNING    >> Atom OP1 from residue (' ', 1, ' ') not found in target atom list
WARNING    >> Atom OP2 from residue (' ', 1, ' ') not found in target atom list
INFO Partial RMSD --> 7.751173
INFO Partial P-Value --> 7.327472e-15
WARNING    >> Atom P from residue (' ', 1, ' ') not found in target atom list
WARNING    >> Atom OP1 from residue (' ', 1, ' ') not found in target atom list
WARNING    >> Atom OP2 from residue (' ', 1, ' ') not found in target atom list






    



14_ChenPostExp_2
  RMSD: 7.751173243045827
  pvalue: 7.327471962526033e-15
14_ChenPostExp_2, rmsd 7.751173243045827
  DI_ALL: 10.643784178530254
  INF_ALL: 0.72823472489

Deformation Profile

Read DeformationProfile Manual.pdf for more details.



In [91]:

    
%%bash
git clone https://github.com/RNA-Puzzles/DeformationProfile.git









    



Cloning into 'DeformationProfile'...

Comparing two similar models



In [92]:

    
cd DeformationProfile/examples/ex1
cd /Users/magnus/work-src/rna-pdb-tools/opt/DeformationProfile









    



  File "<ipython-input-92-aaa236dd1b63>", line 1
    cd DeformationProfile/examples/ex1
                        ^
SyntaxError: invalid syntax



In [93]:

    
%%bash
python dp.py examples/ex1/a.pdb examples/ex1/b.pdb









    



- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
reference:    a.pdb
comparing:    b.pdb

ref id:       201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 
cmp id:       201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 

ref chain:      B   B   B   B   B   B   B   B   B   B   B   B   B   B   B   B   B   B   B   B 
cmp chain:      B   B   B   B   B   B   B   B   B   B   B   B   B   B   B   B   B   B   B   B 
ref residues:   G   G   A   C   U   A   G   C   G   G   A   G   G   C   U   A   G   U   C   C 
cmp residues:   G   G   A   C   U   A   G   C   G   G   A   G   G   C   U   A   G   U   C   C 
align. index:   0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18  19 
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 






    



opening reference file: 'examples/ex1/a.pdb'
opening comparing file: 'examples/ex1/b.pdb'
comparing models...
step: 0 of 19step: 1 of 19step: 2 of 19step: 3 of 19step: 4 of 19step: 5 of 19step: 6 of 19step: 7 of 19step: 8 of 19step: 9 of 19step: 10 of 19step: 11 of 19step: 12 of 19step: 13 of 19step: 14 of 19step: 15 of 19step: 16 of 19step: 17 of 19step: 18 of 19step: 19 of 19done
saving data file...
saving svg file...



In [94]:

    
%%bash
head -n 15 examples/ex1/b.dat









    



#DP 1.0
REF_PDB	examples/ex1/a.pdb
REF_MODEL	0
REF_MODEL_SEQUENCE	GGACUAGCGGAGGCUAGUCC
REF_MODEL_RESIDUES	(B:201:'G  ')(B:202:'G  ')(B:203:'A  ')(B:204:'C  ')(B:205:'U  ')(B:206:'A  ')(B:207:'G  ')(B:208:'C  ')(B:209:'G  ')(B:210:'G  ')(B:211:'A  ')(B:212:'G  ')(B:213:'G  ')(B:214:'C  ')(B:215:'U  ')(B:216:'A  ')(B:217:'G  ')(B:218:'U  ')(B:219:'C  ')(B:220:'C  ')
CMP_PDB	examples/ex1/b.pdb
CMP_MODEL	0
CMP_MODEL_SEQUENCE	GGACUAGCGGAGGCUAGUCC
CMP_MODEL_RESIDUES	(B:201:'G  ')(B:202:'G  ')(B:203:'A  ')(B:204:'C  ')(B:205:'U  ')(B:206:'A  ')(B:207:'G  ')(B:208:'C  ')(B:209:'G  ')(B:210:'G  ')(B:211:'A  ')(B:212:'G  ')(B:213:'G  ')(B:214:'C  ')(B:215:'U  ')(B:216:'A  ')(B:217:'G  ')(B:218:'U  ')(B:219:'C  ')(B:220:'C  ')
LOCAL_RMSD	0.455	0.170	0.093	0.386	0.876	0.140	0.162	0.361	0.782	1.005	0.132	0.488	0.977	0.388	0.338	0.484	0.115	0.164	0.106	0.072
ROW_MEANS	3.090	2.476	2.573	2.873	2.802	1.524	1.490	1.833	4.401	3.858	2.353	2.725	6.356	2.263	1.805	2.301	2.531	2.065	2.437	3.917
COL_MEANS	3.518	3.125	2.749	2.337	1.941	1.827	1.979	2.699	3.295	4.629	4.076	3.247	3.234	2.218	1.947	1.768	1.826	2.388	3.021	3.851
ROW_0	0.275	0.775	1.311	1.894	2.631	2.695	2.894	4.142	5.199	6.791	6.582	5.425	5.092	3.335	2.708	2.299	2.138	2.176	1.817	1.619
ROW_1	0.598	0.146	0.228	0.514	0.817	1.478	2.389	4.116	5.210	7.493	6.986	5.288	5.262	3.322	2.458	1.217	0.537	0.260	0.297	0.897
ROW_2	0.784	0.280	0.079	0.410	0.925	1.524	2.457	4.141	5.234	7.380	7.018	5.348	5.437	3.579	2.793	1.595	0.786	0.505	0.314	0.882

Comparing many similar models in single run



In [103]:

    
%%bash -s "/Users/magnus/work-src/rna-pdb-tools/opt/DeformationProfile/examples/ex2"
cd $1 
mkdir out
python ../../dp.py -c ex2a.cfg









    



- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
reference:    a.pdb
comparing:    b1.pdb

ref id:       201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 
cmp id:       201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 

ref chain:      B   B   B   B   B   B   B   B   B   B   B   B   B   B   B   B   B   B   B   B 
cmp chain:      B   B   B   B   B   B   B   B   B   B   B   B   B   B   B   B   B   B   B   B 
ref residues:   G   G   A   C   U   A   G   C   G   G   A   G   G   C   U   A   G   U   C   C 
cmp residues:   G   G   A   C   U   A   G   C   G   G   A   G   G   C   U   A   G   U   C   C 
align. index:   0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18  19 
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
reference:    a.pdb
comparing:    b2.pdb

ref id:       201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 
cmp id:       201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 

ref chain:      B   B   B   B   B   B   B   B   B   B   B   B   B   B   B   B   B   B   B   B 
cmp chain:      B   B   B   B   B   B   B   B   B   B   B   B   B   B   B   B   B   B   B   B 
ref residues:   G   G   A   C   U   A   G   C   G   G   A   G   G   C   U   A   G   U   C   C 
cmp residues:   G   G   A   C   U   A   G   C   G   G   A   G   G   C   U   A   G   U   C   C 
align. index:   0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18  19 
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
reference:    a.pdb
comparing:    b3.pdb

ref id:       201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 
cmp id:       201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 

ref chain:      B   B   B   B   B   B   B   B   B   B   B   B   B   B   B   B   B   B   B   B 
cmp chain:      B   B   B   B   B   B   B   B   B   B   B   B   B   B   B   B   B   B   B   B 
ref residues:   G   G   A   C   U   A   G   C   G   G   A   G   G   C   U   A   G   U   C   C 
cmp residues:   G   G   A   C   U   A   G   C   G   G   A   G   G   C   U   A   G   U   C   C 
align. index:   0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18  19 
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 






    



mkdir: out: File exists
opening reference file: './ref/a.pdb'
opening comparing file: './cmp/b1.pdb'
comparing models...
step: 0 of 19step: 1 of 19step: 2 of 19step: 3 of 19step: 4 of 19step: 5 of 19step: 6 of 19step: 7 of 19step: 8 of 19step: 9 of 19step: 10 of 19step: 11 of 19step: 12 of 19step: 13 of 19step: 14 of 19step: 15 of 19step: 16 of 19step: 17 of 19step: 18 of 19step: 19 of 19done
saving data file...
saving svg file...
opening comparing file: './cmp/b2.pdb'
comparing models...
step: 0 of 19step: 1 of 19step: 2 of 19step: 3 of 19step: 4 of 19step: 5 of 19step: 6 of 19step: 7 of 19step: 8 of 19step: 9 of 19step: 10 of 19step: 11 of 19step: 12 of 19step: 13 of 19step: 14 of 19step: 15 of 19step: 16 of 19step: 17 of 19step: 18 of 19step: 19 of 19done
saving data file...
saving svg file...
opening comparing file: './cmp/b3.pdb'
comparing models...
step: 0 of 19step: 1 of 19step: 2 of 19step: 3 of 19step: 4 of 19step: 5 of 19step: 6 of 19step: 7 of 19step: 8 of 19step: 9 of 19step: 10 of 19step: 11 of 19step: 12 of 19step: 13 of 19step: 14 of 19step: 15 of 19step: 16 of 19step: 17 of 19step: 18 of 19step: 19 of 19done
saving data file...
saving svg file...



In [105]:

    
%%bash -s "/Users/magnus/work-src/rna-pdb-tools/opt/DeformationProfile/examples/ex3"
cd $1 
mkdir out
python ../../dp.py -c ex3.cfg









    



- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
reference:    a.pdb
comparing:    b.pdb

ref id:       201 202 203 204 205 206 207 208 209   0   1   2   3   4   5   6   7   8   9  10 
cmp id:         1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18  19  20 

ref chain:      B   B   B   B   B   B   B   B   B   B   B   B   B   B   B   B   B   B   B   B 
cmp chain:      A   A   A   A   A   A   A   A   A   A   A   A   A   A   A   A   A   A   A   A 
ref residues:   G   G   A   C   U   A   G   C   G   G   A   G   G   C   U   A   G   U   C   C 
cmp residues:   G   G   A   C   U   A   G   C   G   G   A   G   G   C   U   A   G   U   C   C 
align. index:   0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18  19 
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 






    



mkdir: out: File exists
opening reference file: 'a.pdb'
opening comparing file: 'b.pdb'
comparing models...
step: 0 of 19step: 1 of 19step: 2 of 19step: 3 of 19step: 4 of 19step: 5 of 19step: 6 of 19step: 7 of 19step: 8 of 19step: 9 of 19step: 10 of 19step: 11 of 19step: 12 of 19step: 13 of 19step: 14 of 19step: 15 of 19step: 16 of 19step: 17 of 19step: 18 of 19step: 19 of 19done
saving data file...
saving svg file...

mcq-cli

MCQ (Mean of Circular Quantities) has been first presented in the paper “MCQ4Structures to compute similarity of molecule structures” by Zok et al. (Central European Journal of Operations Research, 2014;22(3):457-474. doi: 10.1007/s10100-013-0296-5).

LCS-TA (Longest Continuous Segments in Torsion Angle space) applies a measure first described in the paper “LCS-TA to identify similar fragments in RNA 3D structures” by Wiedemann et al. (BMC Bioinformatics, 2017;18(1):456. doi: 10.1186/s12859-017-1867-6). A full implementation of both methods can be found on github, maintained by Zok and Wiedemann.

Most important features of both methods / measures:

MCQ
- The method applies to a pair of 3D structures and is size independent.
- It can be used for a set of structures in all-against-all or all-against-target mode.
- It translates typical algebraic representation of a 3D structure into the trigonometric one (a set of torsion angles).
- It computes the distance between structures in torsion angle space.
- The distance is measured as mean of local distances between the corresponding angles, and provided in degrees.
- The measure is sequence independent.
LCS-TA
- The method applies to a pair of 3D structures and is size independent.
- It uses MCQ-based measure for structure comparison.
- Within the compared structures, it finds the longest continuous segments which display similarity in torsion angle space.
- Two segments are considered similar if their MCQ is below predefined threshold.
- The method provides segment length and its position in the structure.
- The length of the longest continuous segment is a measure of similarity of two structures.
- The method can be run in sequence dependent or sequence independent mode.

MCQ is maintained by Zok, while LCS-TA is maintained by Wiedemann

Copy some demo files to your Docker image.



In [25]:

    
%%bash 

docker cp rna_pdb_tools/input/rp18 306468777bc5:/home/demo
echo 'See if the file are at the Docker image:'
#docker exec -i 306468777bc5 /bin/ls -l .
echo 'Run mcq'
echo 'done'









    



See if the file are at the Docker image:
Run mcq
total 36
drwxr-xr-x 2 demo demo 4096 Oct 28 20:43 hsperfdata_demo
drwxr-xr-x 7 demo demo 4096 Oct 28 20:43 99228ed0-d1af-4b60-9903-da360aa8ce1c
drwxr-xr-x 7 demo demo 4096 Oct 28 20:42 14001637-0e41-444a-af71-255c38fbf006
drwxr-xr-x 7 demo demo 4096 Oct 28 20:41 42d358d6-f908-488f-b288-4dd52c1dce59
drwxr-xr-x 7 demo demo 4096 Oct 28 20:40 87997101-cc0f-4a7e-8b6e-bc5b81702a3a
drwxr-xr-x 7 demo demo 4096 Oct 28 20:40 04390fe6-a085-4240-a4e3-d90e7ab9053c
drwxr-xr-x 7 demo demo 4096 Oct 28 20:37 0010b609-890c-48ff-a925-a9fd7b7d3cd0
drwxr-xr-x 7 demo demo 4096 Oct 28 20:33 a1c796dc-2003-4ba9-91fa-dd7b909bdf48
drwxr-xr-x 7 demo demo 4096 Oct 28 20:21 50e0fa17-d097-4a06-9694-f19146192e3e

Local



In [24]:

    
%%bash
docker exec -i 306468777bc5 sh -c "/home/demo/rna-puzzles-toolkit/mcq-cli/local -m /home/demo/rp18/18_Rh* -t /home/demo/rp18/18_0_solution_5TPY_rpr.pdb"









    



Results are available in: /tmp/99228ed0-d1af-4b60-9903-da360aa8ce1c



In [26]:

    
%%bash
docker exec -i 306468777bc5 sh -c "ls -lt /tmp"









    



total 36
drwxr-xr-x 2 demo demo 4096 Oct 28 20:43 hsperfdata_demo
drwxr-xr-x 7 demo demo 4096 Oct 28 20:43 99228ed0-d1af-4b60-9903-da360aa8ce1c
drwxr-xr-x 7 demo demo 4096 Oct 28 20:42 14001637-0e41-444a-af71-255c38fbf006
drwxr-xr-x 7 demo demo 4096 Oct 28 20:41 42d358d6-f908-488f-b288-4dd52c1dce59
drwxr-xr-x 7 demo demo 4096 Oct 28 20:40 87997101-cc0f-4a7e-8b6e-bc5b81702a3a
drwxr-xr-x 7 demo demo 4096 Oct 28 20:40 04390fe6-a085-4240-a4e3-d90e7ab9053c
drwxr-xr-x 7 demo demo 4096 Oct 28 20:37 0010b609-890c-48ff-a925-a9fd7b7d3cd0
drwxr-xr-x 7 demo demo 4096 Oct 28 20:33 a1c796dc-2003-4ba9-91fa-dd7b909bdf48
drwxr-xr-x 7 demo demo 4096 Oct 28 20:21 50e0fa17-d097-4a06-9694-f19146192e3e

Generate a plot based on the table.



In [48]:

    
%%bash
docker exec -i 306468777bc5 sh -c 'cd /tmp/99228ed0-d1af-4b60-9903-da360aa8ce1c/ && ~/rna-puzzles-toolkit/mcq-cli/colorbars.R table.csv'









    



null device 
          1



In [49]:

    
%%bash
docker exec -i 306468777bc5 sh -c "ls /tmp/99228ed0-d1af-4b60-9903-da360aa8ce1c/"









    



18_Rhiju_1_rpr.pdb
18_Rhiju_2_rpr.pdb
18_Rhiju_3_rpr.pdb
18_Rhiju_4_rpr.pdb
18_Rhiju_5_rpr.pdb
colorbars.pdf
table.csv

Copy the results to your local temp.



In [50]:

    
%%bash
docker cp 306468777bc5:/tmp/99228ed0-d1af-4b60-9903-da360aa8ce1c /tmp/
ls /tmp/99228ed0-d1af-4b60-9903-da360aa8ce1c









    



18_Rhiju_1_rpr.pdb
18_Rhiju_2_rpr.pdb
18_Rhiju_3_rpr.pdb
18_Rhiju_4_rpr.pdb
18_Rhiju_5_rpr.pdb
colorbars.pdf
table.csv

View the results using Pandas.



In [51]:

    
import pandas as pd
table = pd.read_csv('/tmp/99228ed0-d1af-4b60-9903-da360aa8ce1c/table.csv')
table









    Out[51]:






  
    
      
      Unnamed: 0
      A.G1
      A.G2
      A.G3
      A.U4
      A.C5
      A.A6
      A.G7
      A.G8
      A.C9
      ...
      A.C62
      A.G63
      A.A64
      A.A65
      A.A66
      A.G67
      A.U68
      A.G69
      A.G70
      A.G71
    
  
  
    
      0
      NaN
      .
      .
      {
      .
      (
      (
      (
      (
      (
      ...
      (
      .
      .
      .
      .
      )
      )
      )
      )
      )
    
    
      1
      18_Rhiju_1_rpr.pdb
      47.82113725714628
      94.33688052900935
      31.39543325853087
      18.076285941192126
      12.151729157844537
      8.501493434485806
      8.598837669174692
      42.59522171589118
      20.87230553973775
      ...
      5.670652702133741
      10.452237697487197
      10.899236647164763
      20.403828903308945
      33.634631786172775
      40.07917778882871
      2.8105157237558216
      3.12268503958415
      3.38706035222638
      5.593981802971269
    
    
      2
      18_Rhiju_2_rpr.pdb
      47.814634236400245
      94.36427399419372
      31.370981142647096
      18.089484899307735
      12.147480701577837
      8.650831027408056
      8.63040256878347
      42.56551078528806
      20.851341100772423
      ...
      5.677884657321962
      10.452737639486374
      10.890558433434231
      20.403397216819638
      33.620206985666044
      40.080335425773846
      2.817183422703433
      3.126361110146106
      3.3941886507573806
      5.589145358542448
    
    
      3
      18_Rhiju_3_rpr.pdb
      47.81641584104296
      94.33716713819841
      31.381629086411603
      18.09762029066829
      12.488499179888773
      7.761124375253679
      8.446983647092896
      42.5977909315378
      20.874485775991467
      ...
      5.693305680051093
      10.461824200944507
      10.897447127771594
      20.391915532271337
      33.63770508870173
      40.08087253048741
      2.804217011141902
      3.1233132994034736
      3.0792003889587933
      4.145803549416084
    
    
      4
      18_Rhiju_4_rpr.pdb
      47.8123326942324
      94.3496976110549
      31.38130853181125
      18.09916815587993
      12.154305729867458
      8.057636320487854
      8.431397105940157
      42.586465159687144
      20.87291438951094
      ...
      5.670652702133741
      10.452237697487197
      10.899236647164763
      20.403828903308945
      33.634631786172775
      40.07917778882871
      2.8105157237558216
      3.12268503958415
      3.38706035222638
      5.593981802971269
    
    
      5
      18_Rhiju_5_rpr.pdb
      30.244337414416037
      49.040831717487315
      38.528085614747795
      24.496042938467244
      28.577976739037847
      17.340345024766222
      8.424235985713661
      42.56798606775238
      20.870294429470505
      ...
      15.110266980984632
      30.30188070737533
      12.54596966689406
      15.542425288298308
      13.00847949411069
      43.67364096067794
      3.053039560544712
      6.671760440953921
      46.73693054868335
      27.501814192468878
    
  

6 rows × 72 columns



In [52]:

    
%%bash
cp /tmp/99228ed0-d1af-4b60-9903-da360aa8ce1c/18_Rhiju_1_rpr.pdb/delta.svg detla.svg

rnaqua: A tool supporting quality assessment of RNA 3D structures

RNAQUA (RNA QUality Assessment) is a web service based wrapper of basic RNA comparison metrics. It is a RESTful web service client developed in Java. The tool provides a set of web services initially designed for RNAssess (Lukasiak et al., Nucleic Acids Research, 2015;43(W1):W502-W506. doi:10.1093/nar/gkv557) to support the quality assessment of RNA 3D structures. RNAQUA requires a stable release of JRE 7 (or later) installed on a user workstation with an internet access. Full implementation of the tool is stored on github and maintained by Antczak. The binaries and use cases can be downloaded from here. Most important features of RNAQUA:

At the input, a user provides RNA 3D structure(s) in PDB format.
Output data are returned in XML format.
RNAQUA allows to compute the following measures:
- ClashScore,
- Root-mean-square deviation (RMSD),
- Interaction network fidelity (INF),
- Deformation index (DI),
- P-value (either for entire RNA 3D structure(s) or for a set of discontinuous 3D substructures).
Two processing modes are available:
- An analysis of single RNA 3D structure (PDB validation, ClashScore, sequence-based analysis, structure unification),
- An analysis of RNA 3D model(s) with respect to the reference structure (RMSD, INF, DI, P-value, Deformation profile, sequence-based differences between structures, multiple models over the reference structure superposition).
Optionally, the user can define alignment between the reference structure and all corresponding RNA 3D model(s) which is helpful if there are differences in sequence, distribution of chains or residue numbering.

This metric tool is maintained by Antczak.

DEMO

Run rnaqua using Docker.



In [6]:

    
%%bash 
docker exec -i rnapuzzles_toolkit_1 /home/demo/rna-puzzles-toolkit/rnaqua-binary/bin/rnaqua.sh -h









    



usage: rnaqua
 -c,--command <arg>   supported commands: PDB-VALIDATION, DEFORMATION-PROFILE, CLASH-SCORE, ROOT-MEAN-SQUARE-DEVIATION, ALL-INTERACTION-NETWORK-FIDELITY-SCORES-AT-ONCE,
                      INTERACTION-NETWORK-FIDELITY-WATSON-CRICK, INTERACTION-NETWORK-FIDELITY-NON-WATSON-CRICK, INTERACTION-NETWORK-FIDELITY-STACKING, INTERACTION-NETWORK-FIDELITY-ALL, P-VALUE,
                      DEFORMATION-INDEX, ALL-SCORES-AT-ONCE, SEQUENCE, FRAGMENT, ORIGINAL-3D, RENUMERATED-3D

PDB-VALIDATION



In [20]:

    
%%bash 
docker exec -i rnapuzzles_toolkit_1 /home/demo/rna-puzzles-toolkit/rnaqua-binary/bin/rnaqua.sh \
--command PDB-VALIDATION \
--multiple-models-directory-path rna-puzzles-toolkit/rnaqua-binary/use-cases/SINGLE-STRUCTURE-ANALYSIS/PDB-VALIDATION/models/ \
--output-file-path /tmp/validation.xml
docker exec -i rnapuzzles_toolkit_1 cat /tmp/validation.xml









    



13 Oct 2018 17:54:20 : Command: PDB-VALIDATION
Base pairs identification tool: MC-ANNOTATE
Consider atoms supported by RNA-Puzzles only: N
Output file path: /tmp/validation.xml
Multiple PDB models directory path: rna-puzzles-toolkit/rnaqua-binary/use-cases/SINGLE-STRUCTURE-ANALYSIS/PDB-VALIDATION/models/
13 Oct 2018 17:54:21 : Command processed properly



In [21]:

    
%%bash
docker exec -i rnapuzzles_toolkit_1 cat /tmp/validation.xml









    



<?xml version="1.0" encoding="UTF-8" standalone="yes"?><descriptions><basics><filename>model.pdb</filename><errors><error>No. 5: Omitted record. Atom serial number is not integer (   A5). Residue sequence number is not integer ( 2A1). Unrecognized residue name (Z). X coordinate is not floating-point number (   2.A25). Y coordinate is not floating-point number ( -2A.462). Z coordinate is not floating-point number (  -3.A25).</error></errors></basics></descriptions>



In [25]:

    
%%bash
docker exec -i rnapuzzles_toolkit_1 /home/demo/rna-puzzles-toolkit/rnaqua-binary/bin/rnaqua.sh \
--command PDB-VALIDATION \
--single-model-file-path rna-puzzles-toolkit/rnaqua-binary/use-cases/SINGLE-STRUCTURE-ANALYSIS/PDB-VALIDATION/models/model.pdb \
--output-file-path /tmp/validation.xml









    



13 Oct 2018 17:59:12 : Command: PDB-VALIDATION
Base pairs identification tool: MC-ANNOTATE
Consider atoms supported by RNA-Puzzles only: N
Output file path: /tmp/validation.xml
Single model PDB file path: rna-puzzles-toolkit/rnaqua-binary/use-cases/SINGLE-STRUCTURE-ANALYSIS/PDB-VALIDATION/models/model.pdb
13 Oct 2018 17:59:12 : Command processed properly

Clash Score

14_ChenPostExp_1_rpr.pdb:U_1,31|U_33,29



In [6]:

    
%%bash
CURRENT_DIR="rna-puzzles-toolkit/rnaqua-binary/use-cases/SINGLE-STRUCTURE-ANALYSIS/SCORES/CLASH-SCORE"
docker exec -i rnapuzzles_toolkit_1 /home/demo/rna-puzzles-toolkit/rnaqua-binary/bin/rnaqua.sh --command CLASH-SCORE --single-model-file-path \
"${CURRENT_DIR}/incontinuous-models/14_ChenPostExp_1_rpr.pdb" \
--alignment "14_ChenPostExp_1_rpr.pdb:U_1,31|U_33,29" --output-file-path /tmp/14_ChenPostExp_1_rpr.xml









    



14 Oct 2018 17:38:19 : Command: CLASH-SCORE
Alignment: 14_ChenPostExp_1_rpr.pdb:U_1_ ,31|U_33_ ,29
Base pairs identification tool: MC-ANNOTATE
Consider atoms supported by RNA-Puzzles only: N
Output file path: /tmp/14_ChenPostExp_1_rpr.xml
Single model PDB file path: rna-puzzles-toolkit/rnaqua-binary/use-cases/SINGLE-STRUCTURE-ANALYSIS/SCORES/CLASH-SCORE/incontinuous-models/14_ChenPostExp_1_rpr.pdb
14 Oct 2018 17:38:30 : Command processed properly



In [7]:

    
%%bash
CURRENT_DIR="rna-puzzles-toolkit/rnaqua-binary/use-cases/SINGLE-STRUCTURE-ANALYSIS/SCORES/CLASH-SCORE"
docker exec -i rnapuzzles_toolkit_1 cat /tmp/14_ChenPostExp_1_rpr.xml
#
#<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
#<measureScores><structure><description><filename>14_ChenPostExp_1_rpr.pdb</filename><errors/></description><score>2.06</score></structure></measureScores>
#









    



<?xml version="1.0" encoding="UTF-8" standalone="yes"?><measureScores><structure><description><filename>14_ChenPostExp_1_rpr.pdb</filename><errors/></description><score>2.06</score></structure></measureScores>

Structure 3D

ORIGINAL-3D and RENUMERATED-3D

An extraction (ORIGINAL-3D) and unification (RENUMERATED-3D) of RNA 3D model(s) or a set of incontinuous 3D substructures specified by the user which are additionally superimposed over the corresponding 3D structure/substructures of the reference. At the output, ZIP archive including the coordinates of the reference structure as well as all considered RNA 3D model(s) is returned.

Alignment

To ensure robustness of quality assessment process a user can specify the appropriate alignment (-a,--alignment) between the reference 3D structure and all analyzed RNA 3D model(s) which often differ slightly in sequence, distribution of chains or numbering of residues.

An example of alignment prepared between the reference structure (solution.pdb) and a single RNA 3D model (model.pdb) is presented below. This alignment considers two incontinuous 3D substructures. Moreover, there is also incompatibility of chain id between compared 3D structures.

Each substructure is described by id of its first residue [i.e., chain id + '' + residue serial number + '' + insertion code (if needed)] and length. To integrate many 3D substructures within a single alignment prepared for the particular RNA 3D structure(s) their descriptions are separated by '|'. Alignments prepared for the reference structure as well as the analyzed RNA 3D models combined into a single string are separated by ';'. Alignment prepared for the reference structure should be always included at the beginning of this string.



In [3]:

    
%%bash
CURRENT_DIR="rna-puzzles-toolkit/rnaqua-binary/use-cases/SINGLE-STRUCTURE-ANALYSIS/STRUCTURE-3D/RENUMERATED-3D/"
docker exec -i rnapuzzles_toolkit_1 /home/demo/rna-puzzles-toolkit/rnaqua-binary/bin/rnaqua.sh \
--command RENUMERATED-3D \
--multiple-models-directory-path "${CURRENT_DIR}/models" \
--alignment "14_ChenPostExp_1_rpr.pdb:U_1,31|U_33,29" \
--output-file-path "/home/demo/14_ChenPostExp_1_rpr.zip"









    



14 Oct 2018 17:37:48 : Command: RENUMERATED-3D
Alignment: 14_ChenPostExp_1_rpr.pdb:U_1_ ,31|U_33_ ,29
Base pairs identification tool: MC-ANNOTATE
Consider atoms supported by RNA-Puzzles only: N
Output file path: /home/demo/14_ChenPostExp_1_rpr.zip
Multiple PDB models directory path: rna-puzzles-toolkit/rnaqua-binary/use-cases/SINGLE-STRUCTURE-ANALYSIS/STRUCTURE-3D/RENUMERATED-3D//models
14 Oct 2018 17:37:49 : Command processed properly

rna-pdb-tools



In [ ]:

    
@inprogress

	Unnamed: 0	A.G1	A.G2	A.G3	A.U4	A.C5	A.A6	A.G7	A.G8	A.C9	...	A.C62	A.G63	A.A64	A.A65	A.A66	A.G67	A.U68	A.G69	A.G70	A.G71
0	NaN	.	.	{	.	(	(	(	(	(	...	(	.	.	.	.	)	)	)	)	)
1	18_Rhiju_1_rpr.pdb	47.82113725714628	94.33688052900935	31.39543325853087	18.076285941192126	12.151729157844537	8.501493434485806	8.598837669174692	42.59522171589118	20.87230553973775	...	5.670652702133741	10.452237697487197	10.899236647164763	20.403828903308945	33.634631786172775	40.07917778882871	2.8105157237558216	3.12268503958415	3.38706035222638	5.593981802971269
2	18_Rhiju_2_rpr.pdb	47.814634236400245	94.36427399419372	31.370981142647096	18.089484899307735	12.147480701577837	8.650831027408056	8.63040256878347	42.56551078528806	20.851341100772423	...	5.677884657321962	10.452737639486374	10.890558433434231	20.403397216819638	33.620206985666044	40.080335425773846	2.817183422703433	3.126361110146106	3.3941886507573806	5.589145358542448
3	18_Rhiju_3_rpr.pdb	47.81641584104296	94.33716713819841	31.381629086411603	18.09762029066829	12.488499179888773	7.761124375253679	8.446983647092896	42.5977909315378	20.874485775991467	...	5.693305680051093	10.461824200944507	10.897447127771594	20.391915532271337	33.63770508870173	40.08087253048741	2.804217011141902	3.1233132994034736	3.0792003889587933	4.145803549416084
4	18_Rhiju_4_rpr.pdb	47.8123326942324	94.3496976110549	31.38130853181125	18.09916815587993	12.154305729867458	8.057636320487854	8.431397105940157	42.586465159687144	20.87291438951094	...	5.670652702133741	10.452237697487197	10.899236647164763	20.403828903308945	33.634631786172775	40.07917778882871	2.8105157237558216	3.12268503958415	3.38706035222638	5.593981802971269
5	18_Rhiju_5_rpr.pdb	30.244337414416037	49.040831717487315	38.528085614747795	24.496042938467244	28.577976739037847	17.340345024766222	8.424235985713661	42.56798606775238	20.870294429470505	...	15.110266980984632	30.30188070737533	12.54596966689406	15.542425288298308	13.00847949411069	43.67364096067794	3.053039560544712	6.671760440953921	46.73693054868335	27.501814192468878

Table of Contents