In [57]:
%%bash
curr_path = `pwd`
cd opt
git clone https://github.com/RNA-Puzzles/BasicAssessMetrics.git
In [60]:
Out[60]:
In [61]:
import sys
import os
import pdb_utils
import utils
import extract
from operator import attrgetter
RESIDUES_LIST = "data/residues.list"
ATOMS_LIST = "data/atoms.list"
def CleanFormat(f):
"""
CleanFormat is a function used to format different platform formats to unix. Users need to install dos2unix
"""
os.system( "mac2unix -q %s" %f )
os.system( "dos2unix -q %s" %f )
In [62]:
def normalize_structure(struct, out_file = None, index_file=None, extract_file = None):
pdb_normalizer = pdb_utils.PDBNormalizer( RESIDUES_LIST, ATOMS_LIST )
ok = pdb_normalizer.parse( struct, out_file )
if not ok:
sys.stderr.write("ERROR: structure not normalized!\n")
else:
sys.stderr.write("INFO: Normalization succeded!\n")
if not extract_file is None:
coords=open(index_file).read()
extract.extract_PDB(SOLUTION_NORMAL,coords, extract_file)
sys.stderr.write("INFO: structure extracted\n")
In [63]:
# PVALUE set according to Hajdin et al., RNA (7) 16, 2010, either "+" or "-"
def calc_RMSD(native_file, native_index, prediction_file, prediction_index, PVALUE = "-"):
res_struct = pdb_utils.PDBStruct()
res_struct.load( native_file, native_index )
res_raw_seq = res_struct.raw_sequence()
sol_struct = pdb_utils.PDBStruct()
sol_struct.load( prediction_file, prediction_index )
sol_raw_seq = sol_struct.raw_sequence()
if( sol_raw_seq != res_raw_seq ):
sys.stderr.write("ERROR Result sequence != Solution sequence!\n")
sys.stderr.write("DATA Solution sequence --> '%s'\n" %sol_raw_seq )
sys.stderr.write("DATA Result sequence --> '%s'\n" %res_raw_seq )
return(-1)
# computes the RMSD
comparer = pdb_utils.PDBComparer()
rmsd = comparer.rmsd( sol_struct, res_struct )
sys.stderr.write("INFO Partial RMSD --> %f\n" %rmsd )
pvalue = comparer.pvalue( rmsd, len(sol_raw_seq), PVALUE )
sys.stderr.write("INFO Partial P-Value --> %e\n" %pvalue )
return(rmsd, pvalue)
In [10]:
def InteractionNetworkFidelity(native_file, native_index, prediction_file, prediction_index):
res_struct = pdb_utils.PDBStruct()
res_struct.load( native_file, native_index )
res_raw_seq = res_struct.raw_sequence()
sol_struct = pdb_utils.PDBStruct()
sol_struct.load( prediction_file, prediction_index )
sol_raw_seq = sol_struct.raw_sequence()
if( sol_raw_seq != res_raw_seq ):
sys.stderr.write("ERROR Result sequence != Solution sequence!\n")
sys.stderr.write("DATA Solution sequence --> '%s'\n" %sol_raw_seq )
sys.stderr.write("DATA Result sequence --> '%s'\n" %res_raw_seq )
return(-1)
# computes the RMSD
comparer = pdb_utils.PDBComparer()
rmsd = comparer.rmsd( sol_struct, res_struct )
INF_ALL = comparer.INF( sol_struct, res_struct, type="ALL" )
DI_ALL = rmsd / INF_ALL
INF_WC = comparer.INF( sol_struct, res_struct, type="PAIR_2D" )
INF_NWC = comparer.INF( sol_struct, res_struct, type="PAIR_3D" )
INF_STACK = comparer.INF( sol_struct, res_struct, type="STACK" )
return (rmsd,DI_ALL, INF_ALL, INF_WC, INF_NWC,INF_STACK)
In [41]:
# Normalize PDB format, correct residue names and atom names.
normalize_structure('example/14_solution_0.pdb','example/14_solution_normalized.pdb')
# calculate RMSD for RNA structures
# require biopython
rmsd, pvalue = calc_RMSD("example/14_solution_0.pdb",
"example/14_solution_0.index",
"example/14_ChenPostExp_2.pdb",
"example/14_ChenPostExp_2.index")
print '14_ChenPostExp_2'
print ' RMSD:', rmsd
print ' pvalue:', pvalue
# calculate InteractionNetworkFidelity and Deformation Index for RNA structures
# need to have MA-annotate in the directory or set in mcannotate.py
rmsd, DI_ALL, INF_ALL, INF_WC, INF_NWC,INF_STACK = InteractionNetworkFidelity("example/14_solution_0.pdb",
"example/14_solution_0.index",
"example/14_ChenPostExp_2.pdb",
"example/14_ChenPostExp_2.index")
print '14_ChenPostExp_2, rmsd', rmsd
print " DI_ALL:", DI_ALL
print " INF_ALL:", INF_ALL
Read DeformationProfile Manual.pdf for more details.
In [91]:
%%bash
git clone https://github.com/RNA-Puzzles/DeformationProfile.git
In [92]:
cd DeformationProfile/examples/ex1
cd /Users/magnus/work-src/rna-pdb-tools/opt/DeformationProfile
In [93]:
%%bash
python dp.py examples/ex1/a.pdb examples/ex1/b.pdb
In [94]:
%%bash
head -n 15 examples/ex1/b.dat
In [103]:
%%bash -s "/Users/magnus/work-src/rna-pdb-tools/opt/DeformationProfile/examples/ex2"
cd $1
mkdir out
python ../../dp.py -c ex2a.cfg
In [105]:
%%bash -s "/Users/magnus/work-src/rna-pdb-tools/opt/DeformationProfile/examples/ex3"
cd $1
mkdir out
python ../../dp.py -c ex3.cfg
MCQ (Mean of Circular Quantities) has been first presented in the paper “MCQ4Structures to compute similarity of molecule structures” by Zok et al. (Central European Journal of Operations Research, 2014;22(3):457-474. doi: 10.1007/s10100-013-0296-5).
LCS-TA (Longest Continuous Segments in Torsion Angle space) applies a measure first described in the paper “LCS-TA to identify similar fragments in RNA 3D structures” by Wiedemann et al. (BMC Bioinformatics, 2017;18(1):456. doi: 10.1186/s12859-017-1867-6). A full implementation of both methods can be found on github, maintained by Zok and Wiedemann.
Most important features of both methods / measures:
MCQ
LCS-TA
MCQ is maintained by Zok, while LCS-TA is maintained by Wiedemann
Copy some demo files to your Docker image.
In [25]:
%%bash
docker cp rna_pdb_tools/input/rp18 306468777bc5:/home/demo
echo 'See if the file are at the Docker image:'
#docker exec -i 306468777bc5 /bin/ls -l .
echo 'Run mcq'
echo 'done'
In [24]:
%%bash
docker exec -i 306468777bc5 sh -c "/home/demo/rna-puzzles-toolkit/mcq-cli/local -m /home/demo/rp18/18_Rh* -t /home/demo/rp18/18_0_solution_5TPY_rpr.pdb"
In [26]:
%%bash
docker exec -i 306468777bc5 sh -c "ls -lt /tmp"
Generate a plot based on the table.
In [48]:
%%bash
docker exec -i 306468777bc5 sh -c 'cd /tmp/99228ed0-d1af-4b60-9903-da360aa8ce1c/ && ~/rna-puzzles-toolkit/mcq-cli/colorbars.R table.csv'
In [49]:
%%bash
docker exec -i 306468777bc5 sh -c "ls /tmp/99228ed0-d1af-4b60-9903-da360aa8ce1c/"
Copy the results to your local temp.
In [50]:
%%bash
docker cp 306468777bc5:/tmp/99228ed0-d1af-4b60-9903-da360aa8ce1c /tmp/
ls /tmp/99228ed0-d1af-4b60-9903-da360aa8ce1c
View the results using Pandas.
In [51]:
import pandas as pd
table = pd.read_csv('/tmp/99228ed0-d1af-4b60-9903-da360aa8ce1c/table.csv')
table
Out[51]:
In [52]:
%%bash
cp /tmp/99228ed0-d1af-4b60-9903-da360aa8ce1c/18_Rhiju_1_rpr.pdb/delta.svg detla.svg
RNAQUA (RNA QUality Assessment) is a web service based wrapper of basic RNA comparison metrics. It is a RESTful web service client developed in Java. The tool provides a set of web services initially designed for RNAssess (Lukasiak et al., Nucleic Acids Research, 2015;43(W1):W502-W506. doi:10.1093/nar/gkv557) to support the quality assessment of RNA 3D structures. RNAQUA requires a stable release of JRE 7 (or later) installed on a user workstation with an internet access. Full implementation of the tool is stored on github and maintained by Antczak. The binaries and use cases can be downloaded from here. Most important features of RNAQUA:
This metric tool is maintained by Antczak.
DEMO
Run rnaqua using Docker.
In [6]:
%%bash
docker exec -i rnapuzzles_toolkit_1 /home/demo/rna-puzzles-toolkit/rnaqua-binary/bin/rnaqua.sh -h
In [20]:
%%bash
docker exec -i rnapuzzles_toolkit_1 /home/demo/rna-puzzles-toolkit/rnaqua-binary/bin/rnaqua.sh \
--command PDB-VALIDATION \
--multiple-models-directory-path rna-puzzles-toolkit/rnaqua-binary/use-cases/SINGLE-STRUCTURE-ANALYSIS/PDB-VALIDATION/models/ \
--output-file-path /tmp/validation.xml
docker exec -i rnapuzzles_toolkit_1 cat /tmp/validation.xml
In [21]:
%%bash
docker exec -i rnapuzzles_toolkit_1 cat /tmp/validation.xml
In [25]:
%%bash
docker exec -i rnapuzzles_toolkit_1 /home/demo/rna-puzzles-toolkit/rnaqua-binary/bin/rnaqua.sh \
--command PDB-VALIDATION \
--single-model-file-path rna-puzzles-toolkit/rnaqua-binary/use-cases/SINGLE-STRUCTURE-ANALYSIS/PDB-VALIDATION/models/model.pdb \
--output-file-path /tmp/validation.xml
14_ChenPostExp_1_rpr.pdb:U_1,31|U_33,29
In [6]:
%%bash
CURRENT_DIR="rna-puzzles-toolkit/rnaqua-binary/use-cases/SINGLE-STRUCTURE-ANALYSIS/SCORES/CLASH-SCORE"
docker exec -i rnapuzzles_toolkit_1 /home/demo/rna-puzzles-toolkit/rnaqua-binary/bin/rnaqua.sh --command CLASH-SCORE --single-model-file-path \
"${CURRENT_DIR}/incontinuous-models/14_ChenPostExp_1_rpr.pdb" \
--alignment "14_ChenPostExp_1_rpr.pdb:U_1,31|U_33,29" --output-file-path /tmp/14_ChenPostExp_1_rpr.xml
In [7]:
%%bash
CURRENT_DIR="rna-puzzles-toolkit/rnaqua-binary/use-cases/SINGLE-STRUCTURE-ANALYSIS/SCORES/CLASH-SCORE"
docker exec -i rnapuzzles_toolkit_1 cat /tmp/14_ChenPostExp_1_rpr.xml
#
#<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
#<measureScores><structure><description><filename>14_ChenPostExp_1_rpr.pdb</filename><errors/></description><score>2.06</score></structure></measureScores>
#
ORIGINAL-3D and RENUMERATED-3D
An extraction (ORIGINAL-3D) and unification (RENUMERATED-3D) of RNA 3D model(s) or a set of incontinuous 3D substructures specified by the user which are additionally superimposed over the corresponding 3D structure/substructures of the reference. At the output, ZIP archive including the coordinates of the reference structure as well as all considered RNA 3D model(s) is returned.
Alignment
To ensure robustness of quality assessment process a user can specify the appropriate alignment (-a,--alignment) between the reference 3D structure and all analyzed RNA 3D model(s) which often differ slightly in sequence, distribution of chains or numbering of residues.
An example of alignment prepared between the reference structure (solution.pdb) and a single RNA 3D model (model.pdb) is presented below. This alignment considers two incontinuous 3D substructures. Moreover, there is also incompatibility of chain id between compared 3D structures.
Each substructure is described by id of its first residue [i.e., chain id + '' + residue serial number + '' + insertion code (if needed)] and length. To integrate many 3D substructures within a single alignment prepared for the particular RNA 3D structure(s) their descriptions are separated by '|'. Alignments prepared for the reference structure as well as the analyzed RNA 3D models combined into a single string are separated by ';'. Alignment prepared for the reference structure should be always included at the beginning of this string.
In [3]:
%%bash
CURRENT_DIR="rna-puzzles-toolkit/rnaqua-binary/use-cases/SINGLE-STRUCTURE-ANALYSIS/STRUCTURE-3D/RENUMERATED-3D/"
docker exec -i rnapuzzles_toolkit_1 /home/demo/rna-puzzles-toolkit/rnaqua-binary/bin/rnaqua.sh \
--command RENUMERATED-3D \
--multiple-models-directory-path "${CURRENT_DIR}/models" \
--alignment "14_ChenPostExp_1_rpr.pdb:U_1,31|U_33,29" \
--output-file-path "/home/demo/14_ChenPostExp_1_rpr.zip"
In [ ]:
@inprogress