In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
%matplotlib inline
In [2]:
#read in median distances for pairwise interactions (obtained from analysis of the pdb)
directory='/Users/weilu/opt/gremlin/'
distancesCACB=pd.read_table(directory+'CACBmediandist.dat', delim_whitespace=True, header=None)
distancesCACA=pd.read_table(directory+'CACAmediandist.dat', delim_whitespace=True, header=None)
distancesCBCB=pd.read_table(directory+'CBCBmediandist.dat', delim_whitespace=True, header=None)
distancesCACB.columns = ['i', 'j', 'dist']
distancesCACA.columns = ['i', 'j', 'dist']
distancesCBCB.columns = ['i', 'j', 'dist']
#if you want to filter the gremlin data, adjust the parameters below
filter_threshold=0.5
column=4
name='raptorX.5.'
In [3]:
pdbIDs=['2xov']
for pdbID in pdbIDs:
#make sure that there is a sequence file for the protein and the downloaded gremlin data in the proper directories
#read and parse raptorX contact prediction data
directory = "/Users/weilu/opt/gremlin/protein/" + pdbID + "/"
raptorX_file=directory+"raptor." + pdbID + ".dat"
count=0
pairs=[]
seq=''
with open(raptorX_file) as f:
for line in f:
count+=1
if count<6:
continue
elif line.split()[0]!='END' and len(line.split())==5:
pairs.append(line.split())
elif len(pairs)==0:
seq+=line.split()[0]
n=len(seq)
# print(n)
rnative_matrixCACB=np.ones([n,n])*99
rnative_matrixCACA=np.ones([n,n])*99
rnative_matrixCBCB=np.ones([n,n])*99
for pair in pairs:
i=int(pair[0])
j=int(pair[1])
irestype=seq[i-1]
jrestype=seq[j-1]
if float(pair[column]) > filter_threshold:
if sum((distancesCACB['i']==irestype)&(distancesCACB['j']==jrestype))>0: #check if pair is in correct order
well_centerCACB = distancesCACB[(distancesCACB['i']==irestype)&(distancesCACB['j']==jrestype)]['dist'].values[0]
well_centerCACA = distancesCACA[(distancesCACA['i']==irestype)&(distancesCACA['j']==jrestype)]['dist'].values[0]
well_centerCBCB = distancesCBCB[(distancesCBCB['i']==irestype)&(distancesCBCB['j']==jrestype)]['dist'].values[0]
else:
well_centerCACB = distancesCACB[(distancesCACB['i']==jrestype)&(distancesCACB['j']==irestype)]['dist'].values[0]
well_centerCACA = distancesCACA[(distancesCACA['i']==jrestype)&(distancesCACA['j']==irestype)]['dist'].values[0]
well_centerCBCB = distancesCBCB[(distancesCBCB['i']==jrestype)&(distancesCBCB['j']==irestype)]['dist'].values[0]
rnative_matrixCACB[i-1, j-1] = well_centerCACB
rnative_matrixCACB[j-1, i-1] = well_centerCACB
rnative_matrixCACA[i-1, j-1] = well_centerCACA
rnative_matrixCACA[j-1, i-1] = well_centerCACA
rnative_matrixCBCB[i-1, j-1] = well_centerCBCB
rnative_matrixCBCB[j-1, i-1] = well_centerCBCB
plt.matshow(rnative_matrixCACB)
directory = "/Users/weilu/opt/gremlin/protein/" + pdbID + "/raptor/"
os.system("mkdir " + directory)
np.savetxt(directory + 'go_rnativeCACB.dat', rnative_matrixCACB, fmt='%10.5f')
np.savetxt(directory + 'go_rnativeCACA.dat', rnative_matrixCACA, fmt='%10.5f')
np.savetxt(directory + 'go_rnativeCBCB.dat', rnative_matrixCBCB, fmt='%10.5f')
In [ ]: