In [1]:
import os
import sys
import random
import time
from random import seed, randint
import argparse
import platform
from datetime import datetime
import imp
import numpy as np
import fileinput
from itertools import product
import pandas as pd
from scipy.interpolate import griddata
from scipy.interpolate import interp2d
import seaborn as sns
from os import listdir
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.interpolate import griddata
import matplotlib as mpl
sys.path.insert(0,'..')
# from notebookFunctions import *
# from .. import notebookFunctions
%matplotlib inline
plt.rcParams['figure.figsize'] = (10,6.180) #golden ratio
# %matplotlib notebook
%load_ext autoreload
%autoreload 2
import sys
sys.path.insert(0, '/Users/weilu/openmmawsem/')
from openmmawsem import *
In [2]:
os.chdir("/Users/weilu/openmmawsem/_local/from_lammps_to_openmm/")
In [6]:
simulation_platform = "OpenCL" # OpenCL, CUDA, CPU, or Reference
pdb_id = '1r69'
pdb = f"{pdb_id}.pdb"
chain='T'
# download(pdb_id)
os.system("cp /Users/weilu/opt/parameters/globular_parameters/burial_gamma.dat .")
os.system("cp /Users/weilu/opt/parameters/globular_parameters/gamma.dat .")
input_pdb_filename, cleaned_pdb_filename = prepare_pdb(pdb, chain)
# ensure_atom_order(input_pdb_filename)
# getSeqFromCleanPdb(input_pdb_filename, chains='A')
In [ ]:
ensure_atom_order(input_pdb_filename)
In [158]:
pwd
Out[158]:
In [13]:
a = open(pdb).read().split("END")
In [143]:
os.system("rm openmmMovie.pdb")
os.system(f"echo 'REMARK converted from awsem lammps output' >> openmmMovie.pdb")
for i in range(30):
with open("tmp.pdb", "w") as out:
out.write(a[i])
input_pdb_filename, cleaned_pdb_filename =prepare_pdb("tmp.pdb", chain)
os.system(f"echo 'MODEL {i+1}' >> openmmMovie.pdb")
os.system("cat tmp-openmmawsem.pdb >> openmmMovie.pdb")
In [159]:
pdb_trajectory = read_trajectory_pdb_positions("openmmMovie.pdb")
oa = OpenMMAWSEMSystem(input_pdb_filename, k_awsem=1.0, xml_filename="../../awsem.xml") # k_awsem is an overall scaling factor that will affect the relevant temperature scales
# apply forces
# forceGroupTable_Rev = {11:"Con", 12:"Chain", 13:"Chi", 14:"Excluded", 15:"Rama", 16:"Direct",
# 17:"Burial", 18:"Mediated", 19:"Fragment"}
forceGroupTable = {"Con":11, "Chain":12, "Chi":13, "Excluded":14, "Rama":15, "Direct":16,
"Burial":17, "Mediated":18, "Contact":18, "Fragment":19, "Total":list(range(11, 20)),
"Water":[16, 18], "Q":1}
forces = [
oa.q_value("crystal_structure-cleaned.pdb", "A"),
# oa.con_term(),
oa.chain_term(),
oa.chi_term(),
# oa.excl_term(),
oa.rama_term(),
oa.rama_proline_term(),
oa.direct_term(),
oa.burial_term(),
oa.mediated_term(),
# oa.fragment_memory_term(frag_location_pre="./")
]
oa.addForcesWithDefaultForceGroup(forces)
# start simulation
collision_rate = 5.0 / picoseconds
integrator = LangevinIntegrator(300*kelvin, 1/picosecond, 2*femtoseconds)
simulation = Simulation(oa.pdb.topology, oa.system, integrator, Platform.getPlatformByName("OpenCL"))
In [160]:
showEnergy = ["Q", "Con", "Chain", "Chi", "Excluded", "Rama", "Water", "Burial", "Fragment", "Total"]
# print("Steps", *showEnergy)
print(" ".join(["{0:<8s}".format(i) for i in ["Steps"] + showEnergy]))
for step, pdb in enumerate(pdb_trajectory):
simulation.context.setPositions(pdb.positions)
e = []
for term in showEnergy:
if type(forceGroupTable[term]) == list:
g = set(forceGroupTable[term])
elif forceGroupTable[term] == -1:
g = -1
else:
g = {forceGroupTable[term]}
state = simulation.context.getState(getEnergy=True, groups=g)
termEnergy = state.getPotentialEnergy().value_in_unit(kilocalories_per_mole)
e.append(termEnergy)
# print(*e)
print(" ".join([f"{step:<8}"] + ["{0:<8.2f}".format(i) for i in e]))
# print(forceGroupTable[term], state.getPotentialEnergy().value_in_unit(kilocalories_per_mole))
In [147]:
124+284+51+9-119-21-54-305
Out[147]:
In [149]:
284+51-117-21-54-305+127+9
Out[149]:
In [ ]:
In [89]:
for pdb in pdb_trajectory:
simulation.context.setPositions(pdb.positions)
state = simulation.context.getState(getEnergy=True, groups={2})
print(state.getPotentialEnergy().value_in_unit(kilocalories_per_mole))
In [65]:
for pdb in pdb_trajectory:
simulation.context.setPositions(pdb.positions)
state = simulation.context.getState(getEnergy=True, groups={2})
print(state.getPotentialEnergy().value_in_unit(kilocalories_per_mole))
In [ ]:
order_parameters_to_compute = {
# "Qvalue": oa.q_value(qvalue_reference_structure, qvalue_reference_chain, min_seq_sep=3, max_seq_sep=np.inf),
"Con": oa.con_term(),
"Chain": oa.chain_term(),
"Chi": oa.chi_term(),
}
In [15]:
a[1]
Out[15]:
In [1]:
input_pdb_filename = "/Users/weilu/Research/server/dec_2018/T0958_single_chain/openmm_T0958/T0958-openmmawsem.pdb"
In [60]:
data = pd.read_table(input_pdb_filename, sep="\s+", header=None, names=["_","i","type","res","chain","res_id", "x","y","z","_1","_2","_3"])
data = data.dropna().reset_index()
data["res_id"] = data["res_id"].apply(lambda x: int(x))
In [56]:
def compute_chi(data):
ca_all = data.query("type == 'CA'")[["x","y","z"]].values
cb_all = data.query("type == 'CB'")[["x","y","z"]].values
c_all = data.query("type == 'C'")[["x","y","z"]].values
n_all = data.query("type == 'N'")[["x","y","z"]].values
energy = 0
for i in range(len(n_all)):
ca = ca_all[i]
cb = cb_all[i]
c = c_all[i]
n = n_all[i]
chi0 = -0.71
k_chi = 60*4.184
r_ca_cb = cb-ca
r_c_ca = ca-c
r_ca_n = n-ca
norm_r_ca_cb = np.sum(r_ca_cb**2)**0.5
norm_r_c_ca = np.sum(r_c_ca**2)**0.5
norm_r_ca_n = np.sum(r_ca_n**2)**0.5
a = np.cross(-r_c_ca,r_ca_n)/norm_r_c_ca/norm_r_ca_n
chi = np.dot(a,r_ca_cb)/norm_r_ca_cb
dchi = chi - chi0
energy += k_chi*dchi*dchi
return energy
In [62]:
chosen = data.query("res != 'IGL' and res_id != 1 and res_id != 63")
compute_chi(chosen)/4.184
Out[62]:
In [61]:
data
Out[61]:
In [162]:
pdb_file = "/Users/weilu/openmmawsem/_local/run_openmm_awsem/crystal_structure-cleaned.pdb"
chain_name = "A"
min_seq_sep=3
max_seq_sep=np.inf
contact_threshold=0.8*nanometers
structure_interactions = []
parser = PDBParser()
structure = parser.get_structure('X', pdb_file)
chain = structure[0][chain_name]
residues = [x for x in chain]
for i, residue_i in enumerate(residues):
for j, residue_j in enumerate(residues):
ca_list = []
cb_list = []
atom_list_i = []
atom_list_j = []
if i-j >= min_seq_sep and i-j <= max_seq_sep: # taking the signed value to avoid double counting
ca_i = residue_i['CA']
ca_list.append(ca_i)
atom_list_i.append(ca_i)
ca_j = residue_j['CA']
ca_list.append(ca_j)
atom_list_j.append(ca_j)
if not residue_i.get_resname() == "GLY":
cb_i = residue_i['CB']
cb_list.append(cb_i)
atom_list_i.append(cb_i)
if not residue_j.get_resname() == "GLY":
cb_j = residue_j['CB']
cb_list.append(cb_j)
atom_list_j.append(cb_j)
for atom_i, atom_j in product(atom_list_i, atom_list_j):
r_ijN = abs(atom_i - atom_j)/10.0*nanometers # convert to nm
if r_ijN <= contact_threshold:
sigma_ij = 0.1*abs(i-j)**0.15 # 0.1 nm = 1 A
gamma_ij = 1.0
# if atom_i in ca_list:
# i_index = self.ca[i]
# if atom_i in cb_list:
# i_index = self.cb[i]
# if atom_j in ca_list:
# j_index = self.ca[j]
# if atom_j in cb_list:
# j_index = self.cb[j]
# structure_interaction = [i_index, j_index, [gamma_ij, r_ijN, sigma_ij]]
# structure_interactions.append(structure_interaction)
In [168]:
for i, residue_i in enumerate(residues):
print(residue_i['CA'])
In [5]:
a = getSeqFromCleanPdb("/Users/weilu/openmmawsem/_local/2fha_openmm/2fha/2fha-openmmawsem.pdb")
In [6]:
len(a)
Out[6]:
In [8]:
# determine number of rows to skip
skipRows = 0
found = False
with open(cleaned_pdb_filename) as f:
for line in f:
if len(line) > 5:
if line[:4] == "ATOM":
found = True
break
skipRows += 1
if not found:
print("No ATOM found")
a = pd.read_table(cleaned_pdb_filename, skiprows=skipRows, sep="\s+", names=["ATOM", "i", "Type", "Res", "Chain", "ResId", "x", "y", "z", "_", "_1", "_2"]).dropna()
# save chain seq to pdb.fasta
import textwrap
with open(fastaFile, "w") as out:
for chain in chains:
out.write(f">{pdb.upper()}:{chain.upper()}|PDBID|CHAIN|SEQUENCE\n")
threeLetterSeq = a.query(f"Chain == '{chain}' and Type == 'CA'")["Res"]
chain_seq = "".join([ThreeToOne[i] for i in threeLetterSeq])
out.write("\n".join(textwrap.wrap(chain_seq, width=80))+"\n")
threeLetterSeq = a.query("Type == 'CA'")["Res"]
seq = "".join([ThreeToOne[i] for i in threeLetterSeq])
In [11]:
parser = PDBParser()
In [30]:
input_pdb_filename = "/Users/weilu/openmmawsem/_local/2fha_openmm/2fha/2fha-openmmawsem.pdb"
chains = "A"
cleaned_pdb_filename = input_pdb_filename.replace("openmmawsem.pdb", "cleaned.pdb")
pdb = input_pdb_filename.replace("-openmmawsem.pdb", "")
fastaFile = pdb + ".fasta"
ThreeToOne = {'ALA':'A','ARG':'R','ASN':'N','ASP':'D','CYS':'C','GLU':'E','GLN':'Q','GLY':'G','HIS':'H',
'ILE':'I','LEU':'L','LYS':'K','MET':'M','PHE':'F','PRO':'P','SER':'S','THR':'T','TRP':'W',
'TYR':'Y','VAL':'V'}
s = parser.get_structure("X", cleaned_pdb_filename)
m = s[0] # model 0
seq = ""
with open(fastaFile, "w") as out:
for chain in chains:
out.write(f">{pdb.upper()}:{chain.upper()}|PDBID|CHAIN|SEQUENCE\n")
c = m[chain]
chain_seq = ""
for residue in c:
residue_name = residue.get_resname()
chain_seq += ThreeToOne[residue_name]
out.write("\n".join(textwrap.wrap(chain_seq, width=80))+"\n")
seq += chain_seq
In [31]:
In [32]:
len(seq)
Out[32]:
In [33]:
seq
Out[33]:
In [10]:
pd.read_table(cleaned_pdb_filename, skiprows=skipRows, sep="\s+", names=["ATOM", "i", "Type", "Res", "Chain", "ResId", "x", "y", "z", "_", "_1", "_2"])
Out[10]:
In [9]:
a
Out[9]:
In [ ]: