BasicAssessMetrics


In [57]:
%%bash
curr_path = `pwd`
cd opt
git clone https://github.com/RNA-Puzzles/BasicAssessMetrics.git


Cloning into 'BasicAssessMetrics'...

In [60]:



Out[60]:
u'/Users/magnus/work-src/rna-pdb-tools/opt'

In [61]:
import sys
import os

import pdb_utils
import utils
import extract

from operator import attrgetter

RESIDUES_LIST = "data/residues.list"
ATOMS_LIST = "data/atoms.list"

def CleanFormat(f):
    """
    CleanFormat is a function used to format different platform formats to unix. Users need to install dos2unix
    """
    os.system( "mac2unix -q %s" %f )
    os.system( "dos2unix -q %s" %f )

In [62]:
def normalize_structure(struct, out_file = None, index_file=None, extract_file = None):
    pdb_normalizer = pdb_utils.PDBNormalizer( RESIDUES_LIST, ATOMS_LIST )
    ok = pdb_normalizer.parse( struct, out_file )
    if not ok:
        sys.stderr.write("ERROR: structure not normalized!\n")
    else:
        sys.stderr.write("INFO: Normalization succeded!\n")
    if not extract_file is None:
        coords=open(index_file).read()
        extract.extract_PDB(SOLUTION_NORMAL,coords, extract_file)
        sys.stderr.write("INFO:	structure extracted\n")

In [63]:
# PVALUE set according to Hajdin et al., RNA (7) 16, 2010, either "+" or "-"
def calc_RMSD(native_file, native_index, prediction_file, prediction_index, PVALUE = "-"):
    res_struct = pdb_utils.PDBStruct()
    res_struct.load( native_file, native_index )
    res_raw_seq = res_struct.raw_sequence()

    sol_struct = pdb_utils.PDBStruct()
    sol_struct.load( prediction_file, prediction_index )
    sol_raw_seq = sol_struct.raw_sequence()

    if( sol_raw_seq != res_raw_seq ):
        sys.stderr.write("ERROR Result sequence != Solution sequence!\n")
        sys.stderr.write("DATA Solution sequence --> '%s'\n" %sol_raw_seq )
        sys.stderr.write("DATA Result sequence   --> '%s'\n" %res_raw_seq )
        return(-1)
    # computes the RMSD
    comparer = pdb_utils.PDBComparer()
    rmsd = comparer.rmsd( sol_struct, res_struct )
    sys.stderr.write("INFO Partial RMSD --> %f\n" %rmsd )
    pvalue = comparer.pvalue( rmsd, len(sol_raw_seq), PVALUE )
    sys.stderr.write("INFO Partial P-Value --> %e\n" %pvalue )
    return(rmsd, pvalue)

In [10]:
def InteractionNetworkFidelity(native_file, native_index, prediction_file, prediction_index):
    res_struct = pdb_utils.PDBStruct()
    res_struct.load( native_file, native_index )
    res_raw_seq = res_struct.raw_sequence()

    sol_struct = pdb_utils.PDBStruct()
    sol_struct.load( prediction_file, prediction_index )
    sol_raw_seq = sol_struct.raw_sequence()

    if( sol_raw_seq != res_raw_seq ):
        sys.stderr.write("ERROR Result sequence != Solution sequence!\n")
        sys.stderr.write("DATA Solution sequence --> '%s'\n" %sol_raw_seq )
        sys.stderr.write("DATA Result sequence	 --> '%s'\n" %res_raw_seq )
        return(-1)
    # computes the RMSD
    comparer = pdb_utils.PDBComparer()
    rmsd = comparer.rmsd( sol_struct, res_struct )
    INF_ALL = comparer.INF( sol_struct, res_struct, type="ALL" )
    DI_ALL = rmsd / INF_ALL
    INF_WC = comparer.INF( sol_struct, res_struct, type="PAIR_2D" )
    INF_NWC = comparer.INF( sol_struct, res_struct, type="PAIR_3D" )
    INF_STACK = comparer.INF( sol_struct, res_struct, type="STACK" )
    return (rmsd,DI_ALL, INF_ALL, INF_WC, INF_NWC,INF_STACK)

In [41]:
# Normalize PDB format, correct residue names and atom names. 
normalize_structure('example/14_solution_0.pdb','example/14_solution_normalized.pdb')

# calculate RMSD for RNA structures
# require biopython
rmsd, pvalue = calc_RMSD("example/14_solution_0.pdb",
        "example/14_solution_0.index",
        "example/14_ChenPostExp_2.pdb",
        "example/14_ChenPostExp_2.index")

print '14_ChenPostExp_2'
print '  RMSD:', rmsd
print '  pvalue:', pvalue

# calculate InteractionNetworkFidelity and Deformation Index for RNA structures
# need to have MA-annotate in the directory or set in mcannotate.py
rmsd, DI_ALL, INF_ALL, INF_WC, INF_NWC,INF_STACK = InteractionNetworkFidelity("example/14_solution_0.pdb",
          "example/14_solution_0.index",
          "example/14_ChenPostExp_2.pdb",
          "example/14_ChenPostExp_2.index")

print '14_ChenPostExp_2, rmsd', rmsd
print "  DI_ALL:", DI_ALL
print "  INF_ALL:", INF_ALL


INFO: Normalization succeded!
WARNING    >> Atom P from residue (' ', 1, ' ') not found in target atom list
WARNING    >> Atom OP1 from residue (' ', 1, ' ') not found in target atom list
WARNING    >> Atom OP2 from residue (' ', 1, ' ') not found in target atom list
INFO Partial RMSD --> 7.751173
INFO Partial P-Value --> 7.327472e-15
WARNING    >> Atom P from residue (' ', 1, ' ') not found in target atom list
WARNING    >> Atom OP1 from residue (' ', 1, ' ') not found in target atom list
WARNING    >> Atom OP2 from residue (' ', 1, ' ') not found in target atom list
14_ChenPostExp_2
  RMSD: 7.751173243045827
  pvalue: 7.327471962526033e-15
14_ChenPostExp_2, rmsd 7.751173243045827
  DI_ALL: 10.643784178530254
  INF_ALL: 0.72823472489

Deformation Profile

Read DeformationProfile Manual.pdf for more details.


In [91]:
%%bash
git clone https://github.com/RNA-Puzzles/DeformationProfile.git


Cloning into 'DeformationProfile'...

Comparing two similar models


In [92]:
cd DeformationProfile/examples/ex1
cd /Users/magnus/work-src/rna-pdb-tools/opt/DeformationProfile


  File "<ipython-input-92-aaa236dd1b63>", line 1
    cd DeformationProfile/examples/ex1
                        ^
SyntaxError: invalid syntax

In [93]:
%%bash
python dp.py examples/ex1/a.pdb examples/ex1/b.pdb


- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
reference:    a.pdb
comparing:    b.pdb

ref id:       201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 
cmp id:       201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 

ref chain:      B   B   B   B   B   B   B   B   B   B   B   B   B   B   B   B   B   B   B   B 
cmp chain:      B   B   B   B   B   B   B   B   B   B   B   B   B   B   B   B   B   B   B   B 
ref residues:   G   G   A   C   U   A   G   C   G   G   A   G   G   C   U   A   G   U   C   C 
cmp residues:   G   G   A   C   U   A   G   C   G   G   A   G   G   C   U   A   G   U   C   C 
align. index:   0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18  19 
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
opening reference file: 'examples/ex1/a.pdb'
opening comparing file: 'examples/ex1/b.pdb'
comparing models...
step: 0 of 19step: 1 of 19step: 2 of 19step: 3 of 19step: 4 of 19step: 5 of 19step: 6 of 19step: 7 of 19step: 8 of 19step: 9 of 19step: 10 of 19step: 11 of 19step: 12 of 19step: 13 of 19step: 14 of 19step: 15 of 19step: 16 of 19step: 17 of 19step: 18 of 19step: 19 of 19done
saving data file...
saving svg file...


In [94]:
%%bash
head -n 15 examples/ex1/b.dat


#DP 1.0
REF_PDB	examples/ex1/a.pdb
REF_MODEL	0
REF_MODEL_SEQUENCE	GGACUAGCGGAGGCUAGUCC
REF_MODEL_RESIDUES	(B:201:'G  ')(B:202:'G  ')(B:203:'A  ')(B:204:'C  ')(B:205:'U  ')(B:206:'A  ')(B:207:'G  ')(B:208:'C  ')(B:209:'G  ')(B:210:'G  ')(B:211:'A  ')(B:212:'G  ')(B:213:'G  ')(B:214:'C  ')(B:215:'U  ')(B:216:'A  ')(B:217:'G  ')(B:218:'U  ')(B:219:'C  ')(B:220:'C  ')
CMP_PDB	examples/ex1/b.pdb
CMP_MODEL	0
CMP_MODEL_SEQUENCE	GGACUAGCGGAGGCUAGUCC
CMP_MODEL_RESIDUES	(B:201:'G  ')(B:202:'G  ')(B:203:'A  ')(B:204:'C  ')(B:205:'U  ')(B:206:'A  ')(B:207:'G  ')(B:208:'C  ')(B:209:'G  ')(B:210:'G  ')(B:211:'A  ')(B:212:'G  ')(B:213:'G  ')(B:214:'C  ')(B:215:'U  ')(B:216:'A  ')(B:217:'G  ')(B:218:'U  ')(B:219:'C  ')(B:220:'C  ')
LOCAL_RMSD	0.455	0.170	0.093	0.386	0.876	0.140	0.162	0.361	0.782	1.005	0.132	0.488	0.977	0.388	0.338	0.484	0.115	0.164	0.106	0.072
ROW_MEANS	3.090	2.476	2.573	2.873	2.802	1.524	1.490	1.833	4.401	3.858	2.353	2.725	6.356	2.263	1.805	2.301	2.531	2.065	2.437	3.917
COL_MEANS	3.518	3.125	2.749	2.337	1.941	1.827	1.979	2.699	3.295	4.629	4.076	3.247	3.234	2.218	1.947	1.768	1.826	2.388	3.021	3.851
ROW_0	0.275	0.775	1.311	1.894	2.631	2.695	2.894	4.142	5.199	6.791	6.582	5.425	5.092	3.335	2.708	2.299	2.138	2.176	1.817	1.619
ROW_1	0.598	0.146	0.228	0.514	0.817	1.478	2.389	4.116	5.210	7.493	6.986	5.288	5.262	3.322	2.458	1.217	0.537	0.260	0.297	0.897
ROW_2	0.784	0.280	0.079	0.410	0.925	1.524	2.457	4.141	5.234	7.380	7.018	5.348	5.437	3.579	2.793	1.595	0.786	0.505	0.314	0.882

Comparing many similar models in single run


In [103]:
%%bash -s "/Users/magnus/work-src/rna-pdb-tools/opt/DeformationProfile/examples/ex2"
cd $1 
mkdir out
python ../../dp.py -c ex2a.cfg


- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
reference:    a.pdb
comparing:    b1.pdb

ref id:       201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 
cmp id:       201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 

ref chain:      B   B   B   B   B   B   B   B   B   B   B   B   B   B   B   B   B   B   B   B 
cmp chain:      B   B   B   B   B   B   B   B   B   B   B   B   B   B   B   B   B   B   B   B 
ref residues:   G   G   A   C   U   A   G   C   G   G   A   G   G   C   U   A   G   U   C   C 
cmp residues:   G   G   A   C   U   A   G   C   G   G   A   G   G   C   U   A   G   U   C   C 
align. index:   0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18  19 
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
reference:    a.pdb
comparing:    b2.pdb

ref id:       201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 
cmp id:       201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 

ref chain:      B   B   B   B   B   B   B   B   B   B   B   B   B   B   B   B   B   B   B   B 
cmp chain:      B   B   B   B   B   B   B   B   B   B   B   B   B   B   B   B   B   B   B   B 
ref residues:   G   G   A   C   U   A   G   C   G   G   A   G   G   C   U   A   G   U   C   C 
cmp residues:   G   G   A   C   U   A   G   C   G   G   A   G   G   C   U   A   G   U   C   C 
align. index:   0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18  19 
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
reference:    a.pdb
comparing:    b3.pdb

ref id:       201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 
cmp id:       201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 

ref chain:      B   B   B   B   B   B   B   B   B   B   B   B   B   B   B   B   B   B   B   B 
cmp chain:      B   B   B   B   B   B   B   B   B   B   B   B   B   B   B   B   B   B   B   B 
ref residues:   G   G   A   C   U   A   G   C   G   G   A   G   G   C   U   A   G   U   C   C 
cmp residues:   G   G   A   C   U   A   G   C   G   G   A   G   G   C   U   A   G   U   C   C 
align. index:   0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18  19 
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
mkdir: out: File exists
opening reference file: './ref/a.pdb'
opening comparing file: './cmp/b1.pdb'
comparing models...
step: 0 of 19step: 1 of 19step: 2 of 19step: 3 of 19step: 4 of 19step: 5 of 19step: 6 of 19step: 7 of 19step: 8 of 19step: 9 of 19step: 10 of 19step: 11 of 19step: 12 of 19step: 13 of 19step: 14 of 19step: 15 of 19step: 16 of 19step: 17 of 19step: 18 of 19step: 19 of 19done
saving data file...
saving svg file...
opening comparing file: './cmp/b2.pdb'
comparing models...
step: 0 of 19step: 1 of 19step: 2 of 19step: 3 of 19step: 4 of 19step: 5 of 19step: 6 of 19step: 7 of 19step: 8 of 19step: 9 of 19step: 10 of 19step: 11 of 19step: 12 of 19step: 13 of 19step: 14 of 19step: 15 of 19step: 16 of 19step: 17 of 19step: 18 of 19step: 19 of 19done
saving data file...
saving svg file...
opening comparing file: './cmp/b3.pdb'
comparing models...
step: 0 of 19step: 1 of 19step: 2 of 19step: 3 of 19step: 4 of 19step: 5 of 19step: 6 of 19step: 7 of 19step: 8 of 19step: 9 of 19step: 10 of 19step: 11 of 19step: 12 of 19step: 13 of 19step: 14 of 19step: 15 of 19step: 16 of 19step: 17 of 19step: 18 of 19step: 19 of 19done
saving data file...
saving svg file...

In [105]:
%%bash -s "/Users/magnus/work-src/rna-pdb-tools/opt/DeformationProfile/examples/ex3"
cd $1 
mkdir out
python ../../dp.py -c ex3.cfg


- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
reference:    a.pdb
comparing:    b.pdb

ref id:       201 202 203 204 205 206 207 208 209   0   1   2   3   4   5   6   7   8   9  10 
cmp id:         1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18  19  20 

ref chain:      B   B   B   B   B   B   B   B   B   B   B   B   B   B   B   B   B   B   B   B 
cmp chain:      A   A   A   A   A   A   A   A   A   A   A   A   A   A   A   A   A   A   A   A 
ref residues:   G   G   A   C   U   A   G   C   G   G   A   G   G   C   U   A   G   U   C   C 
cmp residues:   G   G   A   C   U   A   G   C   G   G   A   G   G   C   U   A   G   U   C   C 
align. index:   0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18  19 
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
mkdir: out: File exists
opening reference file: 'a.pdb'
opening comparing file: 'b.pdb'
comparing models...
step: 0 of 19step: 1 of 19step: 2 of 19step: 3 of 19step: 4 of 19step: 5 of 19step: 6 of 19step: 7 of 19step: 8 of 19step: 9 of 19step: 10 of 19step: 11 of 19step: 12 of 19step: 13 of 19step: 14 of 19step: 15 of 19step: 16 of 19step: 17 of 19step: 18 of 19step: 19 of 19done
saving data file...
saving svg file...

mcq-cli

MCQ (Mean of Circular Quantities) has been first presented in the paper “MCQ4Structures to compute similarity of molecule structures” by Zok et al. (Central European Journal of Operations Research, 2014;22(3):457-474. doi: 10.1007/s10100-013-0296-5).

LCS-TA (Longest Continuous Segments in Torsion Angle space) applies a measure first described in the paper “LCS-TA to identify similar fragments in RNA 3D structures” by Wiedemann et al. (BMC Bioinformatics, 2017;18(1):456. doi: 10.1186/s12859-017-1867-6). A full implementation of both methods can be found on github, maintained by Zok and Wiedemann.

Most important features of both methods / measures:

  • MCQ

    • The method applies to a pair of 3D structures and is size independent.
    • It can be used for a set of structures in all-against-all or all-against-target mode.
    • It translates typical algebraic representation of a 3D structure into the trigonometric one (a set of torsion angles).
    • It computes the distance between structures in torsion angle space.
    • The distance is measured as mean of local distances between the corresponding angles, and provided in degrees.
    • The measure is sequence independent.
  • LCS-TA

    • The method applies to a pair of 3D structures and is size independent.
    • It uses MCQ-based measure for structure comparison.
    • Within the compared structures, it finds the longest continuous segments which display similarity in torsion angle space.
    • Two segments are considered similar if their MCQ is below predefined threshold.
    • The method provides segment length and its position in the structure.
    • The length of the longest continuous segment is a measure of similarity of two structures.
    • The method can be run in sequence dependent or sequence independent mode.

MCQ is maintained by Zok, while LCS-TA is maintained by Wiedemann

Copy some demo files to your Docker image.


In [25]:
%%bash 

docker cp rna_pdb_tools/input/rp18 306468777bc5:/home/demo
echo 'See if the file are at the Docker image:'
#docker exec -i 306468777bc5 /bin/ls -l .
echo 'Run mcq'
echo 'done'


See if the file are at the Docker image:
Run mcq
total 36
drwxr-xr-x 2 demo demo 4096 Oct 28 20:43 hsperfdata_demo
drwxr-xr-x 7 demo demo 4096 Oct 28 20:43 99228ed0-d1af-4b60-9903-da360aa8ce1c
drwxr-xr-x 7 demo demo 4096 Oct 28 20:42 14001637-0e41-444a-af71-255c38fbf006
drwxr-xr-x 7 demo demo 4096 Oct 28 20:41 42d358d6-f908-488f-b288-4dd52c1dce59
drwxr-xr-x 7 demo demo 4096 Oct 28 20:40 87997101-cc0f-4a7e-8b6e-bc5b81702a3a
drwxr-xr-x 7 demo demo 4096 Oct 28 20:40 04390fe6-a085-4240-a4e3-d90e7ab9053c
drwxr-xr-x 7 demo demo 4096 Oct 28 20:37 0010b609-890c-48ff-a925-a9fd7b7d3cd0
drwxr-xr-x 7 demo demo 4096 Oct 28 20:33 a1c796dc-2003-4ba9-91fa-dd7b909bdf48
drwxr-xr-x 7 demo demo 4096 Oct 28 20:21 50e0fa17-d097-4a06-9694-f19146192e3e

Local


In [24]:
%%bash
docker exec -i 306468777bc5 sh -c "/home/demo/rna-puzzles-toolkit/mcq-cli/local -m /home/demo/rp18/18_Rh* -t /home/demo/rp18/18_0_solution_5TPY_rpr.pdb"


Results are available in: /tmp/99228ed0-d1af-4b60-9903-da360aa8ce1c

In [26]:
%%bash
docker exec -i 306468777bc5 sh -c "ls -lt /tmp"


total 36
drwxr-xr-x 2 demo demo 4096 Oct 28 20:43 hsperfdata_demo
drwxr-xr-x 7 demo demo 4096 Oct 28 20:43 99228ed0-d1af-4b60-9903-da360aa8ce1c
drwxr-xr-x 7 demo demo 4096 Oct 28 20:42 14001637-0e41-444a-af71-255c38fbf006
drwxr-xr-x 7 demo demo 4096 Oct 28 20:41 42d358d6-f908-488f-b288-4dd52c1dce59
drwxr-xr-x 7 demo demo 4096 Oct 28 20:40 87997101-cc0f-4a7e-8b6e-bc5b81702a3a
drwxr-xr-x 7 demo demo 4096 Oct 28 20:40 04390fe6-a085-4240-a4e3-d90e7ab9053c
drwxr-xr-x 7 demo demo 4096 Oct 28 20:37 0010b609-890c-48ff-a925-a9fd7b7d3cd0
drwxr-xr-x 7 demo demo 4096 Oct 28 20:33 a1c796dc-2003-4ba9-91fa-dd7b909bdf48
drwxr-xr-x 7 demo demo 4096 Oct 28 20:21 50e0fa17-d097-4a06-9694-f19146192e3e

Generate a plot based on the table.


In [48]:
%%bash
docker exec -i 306468777bc5 sh -c 'cd /tmp/99228ed0-d1af-4b60-9903-da360aa8ce1c/ && ~/rna-puzzles-toolkit/mcq-cli/colorbars.R table.csv'


null device 
          1 

In [49]:
%%bash
docker exec -i 306468777bc5 sh -c "ls /tmp/99228ed0-d1af-4b60-9903-da360aa8ce1c/"


18_Rhiju_1_rpr.pdb
18_Rhiju_2_rpr.pdb
18_Rhiju_3_rpr.pdb
18_Rhiju_4_rpr.pdb
18_Rhiju_5_rpr.pdb
colorbars.pdf
table.csv

Copy the results to your local temp.


In [50]:
%%bash
docker cp 306468777bc5:/tmp/99228ed0-d1af-4b60-9903-da360aa8ce1c /tmp/
ls /tmp/99228ed0-d1af-4b60-9903-da360aa8ce1c


18_Rhiju_1_rpr.pdb
18_Rhiju_2_rpr.pdb
18_Rhiju_3_rpr.pdb
18_Rhiju_4_rpr.pdb
18_Rhiju_5_rpr.pdb
colorbars.pdf
table.csv

View the results using Pandas.


In [51]:
import pandas as pd
table = pd.read_csv('/tmp/99228ed0-d1af-4b60-9903-da360aa8ce1c/table.csv')
table


Out[51]:
Unnamed: 0 A.G1 A.G2 A.G3 A.U4 A.C5 A.A6 A.G7 A.G8 A.C9 ... A.C62 A.G63 A.A64 A.A65 A.A66 A.G67 A.U68 A.G69 A.G70 A.G71
0 NaN . . { . ( ( ( ( ( ... ( . . . . ) ) ) ) )
1 18_Rhiju_1_rpr.pdb 47.82113725714628 94.33688052900935 31.39543325853087 18.076285941192126 12.151729157844537 8.501493434485806 8.598837669174692 42.59522171589118 20.87230553973775 ... 5.670652702133741 10.452237697487197 10.899236647164763 20.403828903308945 33.634631786172775 40.07917778882871 2.8105157237558216 3.12268503958415 3.38706035222638 5.593981802971269
2 18_Rhiju_2_rpr.pdb 47.814634236400245 94.36427399419372 31.370981142647096 18.089484899307735 12.147480701577837 8.650831027408056 8.63040256878347 42.56551078528806 20.851341100772423 ... 5.677884657321962 10.452737639486374 10.890558433434231 20.403397216819638 33.620206985666044 40.080335425773846 2.817183422703433 3.126361110146106 3.3941886507573806 5.589145358542448
3 18_Rhiju_3_rpr.pdb 47.81641584104296 94.33716713819841 31.381629086411603 18.09762029066829 12.488499179888773 7.761124375253679 8.446983647092896 42.5977909315378 20.874485775991467 ... 5.693305680051093 10.461824200944507 10.897447127771594 20.391915532271337 33.63770508870173 40.08087253048741 2.804217011141902 3.1233132994034736 3.0792003889587933 4.145803549416084
4 18_Rhiju_4_rpr.pdb 47.8123326942324 94.3496976110549 31.38130853181125 18.09916815587993 12.154305729867458 8.057636320487854 8.431397105940157 42.586465159687144 20.87291438951094 ... 5.670652702133741 10.452237697487197 10.899236647164763 20.403828903308945 33.634631786172775 40.07917778882871 2.8105157237558216 3.12268503958415 3.38706035222638 5.593981802971269
5 18_Rhiju_5_rpr.pdb 30.244337414416037 49.040831717487315 38.528085614747795 24.496042938467244 28.577976739037847 17.340345024766222 8.424235985713661 42.56798606775238 20.870294429470505 ... 15.110266980984632 30.30188070737533 12.54596966689406 15.542425288298308 13.00847949411069 43.67364096067794 3.053039560544712 6.671760440953921 46.73693054868335 27.501814192468878

6 rows × 72 columns


In [52]:
%%bash
cp /tmp/99228ed0-d1af-4b60-9903-da360aa8ce1c/18_Rhiju_1_rpr.pdb/delta.svg detla.svg

rnaqua: A tool supporting quality assessment of RNA 3D structures

RNAQUA (RNA QUality Assessment) is a web service based wrapper of basic RNA comparison metrics. It is a RESTful web service client developed in Java. The tool provides a set of web services initially designed for RNAssess (Lukasiak et al., Nucleic Acids Research, 2015;43(W1):W502-W506. doi:10.1093/nar/gkv557) to support the quality assessment of RNA 3D structures. RNAQUA requires a stable release of JRE 7 (or later) installed on a user workstation with an internet access. Full implementation of the tool is stored on github and maintained by Antczak. The binaries and use cases can be downloaded from here. Most important features of RNAQUA:

  • At the input, a user provides RNA 3D structure(s) in PDB format.
  • Output data are returned in XML format.
  • RNAQUA allows to compute the following measures:
    • ClashScore,
    • Root-mean-square deviation (RMSD),
    • Interaction network fidelity (INF),
    • Deformation index (DI),
    • P-value (either for entire RNA 3D structure(s) or for a set of discontinuous 3D substructures).
  • Two processing modes are available:
    • An analysis of single RNA 3D structure (PDB validation, ClashScore, sequence-based analysis, structure unification),
    • An analysis of RNA 3D model(s) with respect to the reference structure (RMSD, INF, DI, P-value, Deformation profile, sequence-based differences between structures, multiple models over the reference structure superposition).
  • Optionally, the user can define alignment between the reference structure and all corresponding RNA 3D model(s) which is helpful if there are differences in sequence, distribution of chains or residue numbering.

This metric tool is maintained by Antczak.

DEMO

Run rnaqua using Docker.


In [6]:
%%bash 
docker exec -i rnapuzzles_toolkit_1 /home/demo/rna-puzzles-toolkit/rnaqua-binary/bin/rnaqua.sh -h


usage: rnaqua
 -c,--command <arg>   supported commands: PDB-VALIDATION, DEFORMATION-PROFILE, CLASH-SCORE, ROOT-MEAN-SQUARE-DEVIATION, ALL-INTERACTION-NETWORK-FIDELITY-SCORES-AT-ONCE,
                      INTERACTION-NETWORK-FIDELITY-WATSON-CRICK, INTERACTION-NETWORK-FIDELITY-NON-WATSON-CRICK, INTERACTION-NETWORK-FIDELITY-STACKING, INTERACTION-NETWORK-FIDELITY-ALL, P-VALUE,
                      DEFORMATION-INDEX, ALL-SCORES-AT-ONCE, SEQUENCE, FRAGMENT, ORIGINAL-3D, RENUMERATED-3D

PDB-VALIDATION


In [20]:
%%bash 
docker exec -i rnapuzzles_toolkit_1 /home/demo/rna-puzzles-toolkit/rnaqua-binary/bin/rnaqua.sh \
--command PDB-VALIDATION \
--multiple-models-directory-path rna-puzzles-toolkit/rnaqua-binary/use-cases/SINGLE-STRUCTURE-ANALYSIS/PDB-VALIDATION/models/ \
--output-file-path /tmp/validation.xml
docker exec -i rnapuzzles_toolkit_1 cat /tmp/validation.xml


13 Oct 2018 17:54:20 : Command: PDB-VALIDATION
Base pairs identification tool: MC-ANNOTATE
Consider atoms supported by RNA-Puzzles only: N
Output file path: /tmp/validation.xml
Multiple PDB models directory path: rna-puzzles-toolkit/rnaqua-binary/use-cases/SINGLE-STRUCTURE-ANALYSIS/PDB-VALIDATION/models/
13 Oct 2018 17:54:21 : Command processed properly

In [21]:
%%bash
docker exec -i rnapuzzles_toolkit_1 cat /tmp/validation.xml


<?xml version="1.0" encoding="UTF-8" standalone="yes"?><descriptions><basics><filename>model.pdb</filename><errors><error>No. 5: Omitted record. Atom serial number is not integer (   A5). Residue sequence number is not integer ( 2A1). Unrecognized residue name (Z). X coordinate is not floating-point number (   2.A25). Y coordinate is not floating-point number ( -2A.462). Z coordinate is not floating-point number (  -3.A25).</error></errors></basics></descriptions>

In [25]:
%%bash
docker exec -i rnapuzzles_toolkit_1 /home/demo/rna-puzzles-toolkit/rnaqua-binary/bin/rnaqua.sh \
--command PDB-VALIDATION \
--single-model-file-path rna-puzzles-toolkit/rnaqua-binary/use-cases/SINGLE-STRUCTURE-ANALYSIS/PDB-VALIDATION/models/model.pdb \
--output-file-path /tmp/validation.xml


13 Oct 2018 17:59:12 : Command: PDB-VALIDATION
Base pairs identification tool: MC-ANNOTATE
Consider atoms supported by RNA-Puzzles only: N
Output file path: /tmp/validation.xml
Single model PDB file path: rna-puzzles-toolkit/rnaqua-binary/use-cases/SINGLE-STRUCTURE-ANALYSIS/PDB-VALIDATION/models/model.pdb
13 Oct 2018 17:59:12 : Command processed properly

Clash Score

14_ChenPostExp_1_rpr.pdb:U_1,31|U_33,29

In [6]:
%%bash
CURRENT_DIR="rna-puzzles-toolkit/rnaqua-binary/use-cases/SINGLE-STRUCTURE-ANALYSIS/SCORES/CLASH-SCORE"
docker exec -i rnapuzzles_toolkit_1 /home/demo/rna-puzzles-toolkit/rnaqua-binary/bin/rnaqua.sh --command CLASH-SCORE --single-model-file-path \
"${CURRENT_DIR}/incontinuous-models/14_ChenPostExp_1_rpr.pdb" \
--alignment "14_ChenPostExp_1_rpr.pdb:U_1,31|U_33,29" --output-file-path /tmp/14_ChenPostExp_1_rpr.xml


14 Oct 2018 17:38:19 : Command: CLASH-SCORE
Alignment: 14_ChenPostExp_1_rpr.pdb:U_1_ ,31|U_33_ ,29
Base pairs identification tool: MC-ANNOTATE
Consider atoms supported by RNA-Puzzles only: N
Output file path: /tmp/14_ChenPostExp_1_rpr.xml
Single model PDB file path: rna-puzzles-toolkit/rnaqua-binary/use-cases/SINGLE-STRUCTURE-ANALYSIS/SCORES/CLASH-SCORE/incontinuous-models/14_ChenPostExp_1_rpr.pdb
14 Oct 2018 17:38:30 : Command processed properly

In [7]:
%%bash
CURRENT_DIR="rna-puzzles-toolkit/rnaqua-binary/use-cases/SINGLE-STRUCTURE-ANALYSIS/SCORES/CLASH-SCORE"
docker exec -i rnapuzzles_toolkit_1 cat /tmp/14_ChenPostExp_1_rpr.xml
#
#<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
#<measureScores><structure><description><filename>14_ChenPostExp_1_rpr.pdb</filename><errors/></description><score>2.06</score></structure></measureScores>
#


<?xml version="1.0" encoding="UTF-8" standalone="yes"?><measureScores><structure><description><filename>14_ChenPostExp_1_rpr.pdb</filename><errors/></description><score>2.06</score></structure></measureScores>

Structure 3D

ORIGINAL-3D and RENUMERATED-3D

An extraction (ORIGINAL-3D) and unification (RENUMERATED-3D) of RNA 3D model(s) or a set of incontinuous 3D substructures specified by the user which are additionally superimposed over the corresponding 3D structure/substructures of the reference. At the output, ZIP archive including the coordinates of the reference structure as well as all considered RNA 3D model(s) is returned.

Alignment

To ensure robustness of quality assessment process a user can specify the appropriate alignment (-a,--alignment) between the reference 3D structure and all analyzed RNA 3D model(s) which often differ slightly in sequence, distribution of chains or numbering of residues.

An example of alignment prepared between the reference structure (solution.pdb) and a single RNA 3D model (model.pdb) is presented below. This alignment considers two incontinuous 3D substructures. Moreover, there is also incompatibility of chain id between compared 3D structures.

Each substructure is described by id of its first residue [i.e., chain id + '' + residue serial number + '' + insertion code (if needed)] and length. To integrate many 3D substructures within a single alignment prepared for the particular RNA 3D structure(s) their descriptions are separated by '|'. Alignments prepared for the reference structure as well as the analyzed RNA 3D models combined into a single string are separated by ';'. Alignment prepared for the reference structure should be always included at the beginning of this string.


In [3]:
%%bash
CURRENT_DIR="rna-puzzles-toolkit/rnaqua-binary/use-cases/SINGLE-STRUCTURE-ANALYSIS/STRUCTURE-3D/RENUMERATED-3D/"
docker exec -i rnapuzzles_toolkit_1 /home/demo/rna-puzzles-toolkit/rnaqua-binary/bin/rnaqua.sh \
--command RENUMERATED-3D \
--multiple-models-directory-path "${CURRENT_DIR}/models" \
--alignment "14_ChenPostExp_1_rpr.pdb:U_1,31|U_33,29" \
--output-file-path "/home/demo/14_ChenPostExp_1_rpr.zip"


14 Oct 2018 17:37:48 : Command: RENUMERATED-3D
Alignment: 14_ChenPostExp_1_rpr.pdb:U_1_ ,31|U_33_ ,29
Base pairs identification tool: MC-ANNOTATE
Consider atoms supported by RNA-Puzzles only: N
Output file path: /home/demo/14_ChenPostExp_1_rpr.zip
Multiple PDB models directory path: rna-puzzles-toolkit/rnaqua-binary/use-cases/SINGLE-STRUCTURE-ANALYSIS/STRUCTURE-3D/RENUMERATED-3D//models
14 Oct 2018 17:37:49 : Command processed properly

rna-pdb-tools


In [ ]:
@inprogress