In [2]:
import os
import sys
import random
import time
from random import seed, randint
import argparse
import platform
from datetime import datetime
import imp
import numpy as np
import fileinput
from itertools import product
import pandas as pd
from scipy.interpolate import griddata
from scipy.interpolate import interp2d
import seaborn as sns
from os import listdir
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.interpolate import griddata
import matplotlib as mpl
# sys.path.insert(0,'..')
# from notebookFunctions import *
# from .. import notebookFunctions
from Bio.PDB.PDBParser import PDBParser
# from small_script.myFunctions import *
# from pyCodeLib import *
%matplotlib inline
# plt.rcParams['figure.figsize'] = (10,6.180) #golden ratio
# %matplotlib notebook
%load_ext autoreload
%autoreload 2
In [3]:
plt.rcParams['figure.figsize'] = [16.18033, 10] #golden ratio
plt.rcParams['figure.facecolor'] = 'w'
plt.rcParams['figure.dpi'] = 100
In [21]:
def getSeqFromFasta(fastaFile):
# "/Users/weilu/Research/server/aug_2019/hybrid_protein_simulation/setup/1pv6/1pv6.fasta"
with open(fastaFile) as f:
a = f.readlines()
seq = ""
for line in a:
if line[0] == ">":
continue
seq += line.strip()
return seq
def getContactMapFromPDB(pdbFile, n):
# n = 472
# pdbFile = "/Users/weilu/Research/server/oct_2019/draw_contact_for_DMP/5xr8_clean.pdb"
cutoff = 9.5
MAX_OFFSET = 0
parser = PDBParser()
structure = parser.get_structure('target', pdbFile)
contact_table = np.ones((n,n)) * 99
all_residues = list(structure.get_residues())
for idx_i, res1 in enumerate(all_residues):
i = res1.get_id()[1] - 1
for idx_j, res2 in enumerate(all_residues):
j = res2.get_id()[1] - 1
contact_table[i][j] = res1["CA"]-res2["CA"]
data = (contact_table < cutoff)
remove_band = np.eye(n)
for i in range(1, MAX_OFFSET):
remove_band += np.eye(n, k=i)
remove_band += np.eye(n, k=-i)
data[remove_band==1] = 0
return data
def get_contactFromDMP(fileLocation, n, threshold=0.2):
a = np.zeros((n,n))
c_list = []
with open(fileLocation, "r") as f:
# for i in range(9):
# next(f)
for line in f:
# print(line)
try:
i,j,_,_,_,p = line.split(" ")
# print(i,j,p)
a[int(i)-1,int(j)-1] = float(p)
a[int(j)-1,int(i)-1] = float(p)
if float(p) > threshold:
c_list.append([int(i),int(j),float(p)])
except Exception as e:
print(e)
pass
return a, np.array(c_list)
In [25]:
fastaFile = "/Users/weilu/Research/server/jun_week1_2020/no_DNA_protein_DNA_benchmark/setups/1a36/1a36.fasta"
seq = getSeqFromFasta(fastaFile)
n = len(seq)
pdbFile = "/Users/weilu/Research/server/jun_week1_2020/no_DNA_protein_DNA_benchmark/setups/1a36/1a36.pdb"
data = getContactMapFromPDB(pdbFile, n)
fileLocation = "/Users/weilu/Research/server/jun_week1_2020/no_DNA_protein_DNA_benchmark/DMP/1a36/1a36.deepmetapsicov.con"
a, _ = get_contactFromDMP(fileLocation, n)
In [8]:
In [45]:
plt.rcParams['figure.figsize'] = 1.5*np.array([16.18033, 10]) #golden ratio
plt.rcParams['figure.facecolor'] = 'w'
plt.rcParams['figure.dpi'] = 100
In [56]:
np.tri(n, k=-2)
Out[56]:
In [60]:
# data_s = data.astype(float)
threshold=0.5
native = data.astype(float)
predicted = (a>threshold).astype(float)
combined = native + predicted * 2
upper = combined * np.tri(n, k=-6)
# data_s = data.astype(float)
# t_s = (a>0.5).astype(float)
# combined = data_s + t_s * 2
combined = native + predicted * 2
lower = combined * (1 - np.tri(n, k=6))
combined = upper + lower
from matplotlib import colors
# red is in crystal but not in predicted.
# blue is in predicted but not in crystal
# black is in both
cmap = colors.ListedColormap(['white', '#e6194B', '#4363d8', '#bfef45'])
cmap = colors.ListedColormap(['white', '#fabed4', '#42d4f4', '#000000'])
# cmap = colors.ListedColormap(['white', 'red', 'green', 'blue'])
# cmap = colors.ListedColormap(['white', '#bfef45', '#4363d8', '#e6194B'])
# cmap = colors.ListedColormap(['white', 'red', 'blue', '#bfef45'])
bounds=[-1,0.1, 1.1, 2.1, 3.1]
norm = colors.BoundaryNorm(bounds, cmap.N)
plt.imshow(combined, origin="bottom", cmap=cmap, norm=norm)
plt.title("Red is only in crystal, Blue is only in Predicted(upper), DMP in lower part")
# plt.savefig("/Users/weilu/Research/server/oct_2019/draw_contact_for_DMP/contact.png")
Out[60]:
In [48]:
In [49]:
fileLocation = "/Users/weilu/Research/server/oct_2019/draw_contact_for_DMP/cannabinoid_receptor.deepmetapsicov.con"
a, _ = get_contactFromDMP(fileLocation, 472)
n = 472
pdbFile = "/Users/weilu/Research/server/oct_2019/draw_contact_for_DMP/5xr8_clean.pdb"
data = getContactMapFromPDB(pdbFile, n)
In [50]:
pdbFile = "/Users/weilu/Research/server/oct_2019/draw_contact_for_DMP/lastFrame.pdb"
lastFrame = getContactMapFromPDB(pdbFile, n)
In [ ]:
data_s = data.astype(float)
t_s = lastFrame.astype(float)
combined = data_s + t_s * 2
from matplotlib import colors
# red is in crystal but not in predicted.
# blue is in predicted but not in crystal
# black is in both
cmap = colors.ListedColormap(['white', 'red', 'blue', 'black'])
bounds=[-1,0.1, 1.1, 2.1, 3.1]
norm = colors.BoundaryNorm(bounds, cmap.N)
plt.imshow(combined, origin="bottom", cmap=cmap, norm=norm)
plt.title("Red is only in crystal, Blue is only in Predicted")
# plt.savefig("/Users/weilu/Research/server/oct_2019/draw_contact_for_DMP/contact.png")
In [62]:
data_s = (a>0.5).astype(float)
t_s = lastFrame.astype(float)
combined = data_s + t_s * 2
from matplotlib import colors
# red is in crystal but not in predicted.
# blue is in predicted but not in crystal
# black is in both
cmap = colors.ListedColormap(['white', 'red', 'green', 'blue'])
# cmap = "Accent"
bounds=[-1,0.1, 1.1, 2.1, 3.1]
norm = colors.BoundaryNorm(bounds, cmap.N)
plt.imshow(combined, origin="bottom", cmap=cmap, norm=norm)
plt.title("Red is only in crystal, Blue is only in Predicted")
# plt.savefig("/Users/weilu/Research/server/oct_2019/draw_contact_for_DMP/contact.png")
Out[62]:
In [80]:
Out[80]:
In [83]:
data_s = data.astype(float)
t_s = lastFrame.astype(float)
combined = data_s + t_s * 2
upper = combined * np.tri(n, k=-1)
data_s = data.astype(float)
t_s = (a>0.5).astype(float)
combined = data_s + t_s * 2
lower = combined * (1 - np.tri(n, k=0))
combined = upper + lower
from matplotlib import colors
# red is in crystal but not in predicted.
# blue is in predicted but not in crystal
# black is in both
cmap = colors.ListedColormap(['white', '#e6194B', '#4363d8', '#bfef45'])
# cmap = colors.ListedColormap(['white', 'red', 'blue', '#bfef45'])
bounds=[-1,0.1, 1.1, 2.1, 3.1]
norm = colors.BoundaryNorm(bounds, cmap.N)
plt.imshow(combined, origin="bottom", cmap=cmap, norm=norm)
plt.title("Red is only in crystal, Blue is only in Predicted(upper), DMP in lower part")
plt.savefig("/Users/weilu/Research/server/oct_2019/draw_contact_for_DMP/contact.png")
In [52]:
data_s = data.astype(float)
t_s = (a>0.5).astype(float)
combined = data_s + t_s * 2
from matplotlib import colors
# red is in crystal but not in predicted.
# blue is in predicted but not in crystal
# black is in both
cmap = colors.ListedColormap(['white', 'red', 'blue', 'black'])
bounds=[-1,0.1, 1.1, 2.1, 3.1]
norm = colors.BoundaryNorm(bounds, cmap.N)
plt.imshow(combined, origin="bottom", cmap=cmap, norm=norm)
plt.title("Red is only in crystal, Blue is only in Predicted")
# plt.savefig("/Users/weilu/Research/server/oct_2019/draw_contact_for_DMP/contact.png")
Out[52]:
In [34]:
from matplotlib import colors
# red is in crystal but not in predicted.
# blue is in predicted but not in crystal
# black is in both
cmap = colors.ListedColormap(['white', 'red', 'blue', 'black'])
bounds=[-1,0.1, 1.1, 2.1, 3.1]
norm = colors.BoundaryNorm(bounds, cmap.N)
plt.imshow(combined, origin="bottom", cmap=cmap, norm=norm)
plt.title("Red is only in crystal, Blue is only in Predicted")
# plt.savefig("/Users/weilu/Research/server/oct_2019/draw_contact_for_DMP/contact.png")
In [ ]:
plt.imshow(a[300:,150:250], origin="bottom")
plt.colorbar()
In [9]:
plt.imshow(a)
Out[9]:
In [7]:
len(a[5])
Out[7]:
In [22]:
pdb = "4rws"
fileLocation = f"/Users/weilu/Research/raptorX/{pdb}/A.txt"
data = pd.read_csv(fileLocation, skiprows=6, sep="\s+", names=["i","j","s", "ss","p"]).dropna().reset_index(drop=True)
data["i"] = data["i"].astype(int)
data["j"] = data["j"].astype(int)
data_A = data
seq_A = getSeqFromRaptorXContact(fileLocation)
fileLocation = f"/Users/weilu/Research/raptorX/{pdb}/B.txt"
data = pd.read_csv(fileLocation, skiprows=6, sep="\s+", names=["i","j","s", "ss","p"]).dropna().reset_index(drop=True)
data["i"] = data["i"].astype(int)
data["j"] = data["j"].astype(int)
data_B = data
seq_B = getSeqFromRaptorXContact(fileLocation)
In [86]:
import textwrap
header = '''\
PFRMAT RR
TARGET {}
AUTHOR RaptorX-Contact
METHOD deep dilated residual networks (one variant of deep CNN). Consult jinboxu@gmail.com for details.
MODEL 1
'''
fileLocation = f"/Users/weilu/Research/raptorX/{pdb}/{pdb}.txt"
with open(fileLocation, "w") as out:
out.write(header.format(pdb))
out.write("\n".join(textwrap.wrap(seq, width=50))+"\n")
for index, d in data_A.iterrows():
# print(index)
i = int(d["i"])
j = int(d["j"])
p = round(d["p"], 8)
s = int(d["s"])
ss = int(d["ss"])
out.write(f"{i} {j} {s} {ss} {p}\n")
for index, d in data_B.iterrows():
# print(index)
i = int(d["i"]) + len(seq_A)
j = int(d["j"]) + len(seq_A)
p = round(d["p"], 8)
s = int(d["s"])
ss = int(d["ss"])
out.write(f"{i} {j} {s} {ss} {p}\n")
out.write("END\n")
In [80]:
for index, d in data_B.iterrows():
# print(index)
i = int(d["i"]) - 1 + len(seq_A)
j = int(d["j"]) - 1 + len(seq_A)
p = round(d["p"], 8)
s = int(d["s"])
ss = int(d["ss"])
In [84]:
data_A
Out[84]:
In [79]:
round(p, 8)
Out[79]:
In [23]:
seq = seq_A + seq_B
In [26]:
seq
Out[26]:
In [27]:
n = len(seq)
t = np.zeros((n,n))
for index, d in data_A.iterrows():
# print(index)
i = int(d["i"]) - 1
j = int(d["j"]) - 1
p = d["p"]
# print(i,j,p)
t[i,j] = p
t[j,i] = p
for index, d in data_B.iterrows():
# print(index)
i = int(d["i"]) - 1 + len(seq_A)
j = int(d["j"]) - 1 + len(seq_A)
p = d["p"]
# print(i,j,p)
t[i,j] = p
t[j,i] = p
In [28]:
plt.imshow((t>0.5).astype(float), origin="bottom")
plt.colorbar()
Out[28]:
In [29]:
def getSeqFromRaptorXContact(fileLocation):
with open(fileLocation) as f:
a = f.readlines()
i = 4
seq = ""
assert a[i] == "MODEL 1\n"
i += 1
while True:
line = a[i]
if line[0].isdigit():
break
i += 1
seq += line.strip()
# print(i)
return seq
def getContactMapFromPDB(pdbFile):
cutoff = 9.5
MAX_OFFSET = 6
parser = PDBParser()
structure = parser.get_structure('target', pdbFile)
all_residues = list(structure.get_residues())
n = len(all_residues)
contact_table = np.zeros((n,n))
print(pdb, n)
for i, res1 in enumerate(all_residues):
for j, res2 in enumerate(all_residues):
contact_table[i][j] = res1["CA"]-res2["CA"]
data = (contact_table < cutoff)
remove_band = np.eye(n)
for i in range(1, MAX_OFFSET):
remove_band += np.eye(n, k=i)
remove_band += np.eye(n, k=-i)
data[remove_band==1] = 0
return data
In [30]:
data = getContactMapFromPDB("/Users/weilu/Research/server/jul_2019/two_chains/cleaned_pdbs/4rws.pdb")
In [70]:
data_s = data.astype(int)
t_s = (t>0.55).astype(float)
In [72]:
fig, ax = plt.subplots()
# ax.imshow(data_s+1, origin="bottom")
ax.imshow(t, origin="bottom")
Out[72]:
In [60]:
combined = data_s + t_s * 2
In [63]:
from matplotlib import colors
cmap = colors.ListedColormap(['white', 'red', 'blue', 'black'])
bounds=[-1,0.1, 1.1, 2.1, 3.1]
norm = colors.BoundaryNorm(bounds, cmap.N)
plt.imshow(combined, origin="bottom", cmap=cmap, norm=norm)
Out[63]:
In [87]:
pdbFile = "/Users/weilu/Research/server/jul_2019/two_chains/cleaned_pdbs/4rws.pdb"
parser = PDBParser()
structure = parser.get_structure('target', pdbFile)
In [88]:
model = structure[0]
In [90]:
a = list(model.get_chains())
In [92]:
c = a[0]
In [94]:
c.id
Out[94]:
In [95]:
c = "ALL"
In [99]:
"ABC" is not "ALL"
Out[99]:
In [93]:
c.get_id()
Out[93]:
In [55]:
fig, ax = plt.subplots()
ax.imshow(data_s+1, origin="bottom")
ax.imshow(t_s, origin="bottom")
Out[55]:
In [43]:
plt.imshow(data_s, origin="bottom")
Out[43]:
In [44]:
# plt.imshow(-data_s, origin="bottom")
plt.imshow(t_s, origin="bottom")
# plt.colorbar()
Out[44]:
In [16]:
346
Out[16]:
In [17]:
fileLocation = f"/Users/weilu/Research/raptorX/{pdb}/B.txt"
In [20]:
len(seq)
Out[20]:
In [18]:
pdb = "4rws"
fileLocation = f"/Users/weilu/Research/raptorX/{pdb}/A.txt"
data = pd.read_csv(fileLocation, skiprows=6, sep="\s+", names=["i","j","s", "ss","p"]).dropna().reset_index(drop=True)
data["i"] = data["i"].astype(int)
data["j"] = data["j"].astype(int)
n = int(info.query(f"Protein =='{pdb}'")["Length"])
t = np.zeros((n,n))
for index, d in data.iterrows():
# print(index)
i = int(d["i"]) - 1
j = int(d["j"]) - 1
p = d["p"]
# print(i,j,p)
t[i,j] = p
t[j,i] = p
In [ ]:
def getContactMap(pdb):
fileLocation = f"/Users/weilu/Research/raptorX/{pdb}/contactmap.txt"
data = pd.read_csv(fileLocation, skiprows=6, sep="\s+", names=["i","j","s", "ss","p"]).dropna().reset_index(drop=True)
data["i"] = data["i"].astype(int)
data["j"] = data["j"].astype(int)
n = int(info.query(f"Protein =='{pdb}'")["Length"])
t = np.zeros((n,n))
for index, d in data.iterrows():
# print(index)
i = int(d["i"]) - 1
j = int(d["j"]) - 1
p = d["p"]
# print(i,j,p)
t[i,j] = p
t[j,i] = p