In [1]:
import sys
sys.path.append("/projects/pw8/wl45/open3spn2")
sys.path.append("/projects/pw8/wl45/openawsem")
sys.path.append("/Users/weilu/open3spn2")
sys.path.append("/Users/weilu/openmmawsem")
sys.path.append("C:/Users/luwei/Documents/GitHub/open3spn2")
sys.path.append("C:/Users/luwei/Documents/GitHub/openawsem")
import open3SPN2
import ffAWSEM
import time
import simtk.openmm
import simtk.openmm.app
In [10]:
import scipy.spatial.distance as sdist
import pandas as pd
In [3]:
pdb_file = "/Users/weilu/Research/server/jun_week1_2020/protein_DNA_benchmark/DNAProtein_Platform_OpenCL_date_20200226_pdb_1a36_repetition_0_clean.pdb"
seq_file = "/Users/weilu/Research/server/jun_week1_2020/protein_DNA_benchmark/DNAProtein_Platform_OpenCL_date_20200226_pdb_1a36_repetition_0_protein.seq"
with open(seq_file) as ps:
protein_sequence_one=ps.readlines()[0]
protein=ffAWSEM.Protein.fromCoarsePDB(pdb_file,sequence=protein_sequence_one)
In [6]:
data = protein.atoms
In [27]:
protein_resNames = ["NGP", "IGL", "IPR", "NTER", "CTER"]
DNA_resNames = ["DA", "DC", "DT", "DG"]
group1_index = []
group2_index = []
for i, line in data.iterrows():
resname = line["resname"]
index = line["serial"]
name = line["name"]
if resname in DNA_resNames and name == "P":
group1_index.append(index)
if resname in protein_resNames and name == "CA":
group2_index.append(index)
In [11]:
#Calculate native distances
CA_atoms=protein.atoms[protein.atoms.name=='CA']
P_atoms=protein.atoms[protein.atoms.name=='P']
d=sdist.cdist(CA_atoms[['x','y','z']],P_atoms[['x','y','z']])/10 #Distance in nanometers
d_sq=pd.DataFrame(d,index=CA_atoms.index,columns=P_atoms.index)
d=d_sq.copy()
d['Protein']=d.index
d=d.melt(id_vars=['Protein'])
d=d.rename(columns={'variable': 'DNA', 'value': 'distance'})
In [23]:
d.query("distance < 1")
Out[23]:
In [17]:
d["DNA"].unique()
Out[17]:
In [18]:
d.shape
Out[18]:
In [19]:
23142/40
Out[19]:
In [31]:
data = pd.read_csv("/Users/weilu/Research/server/jun_week3_2020/protein_DNA/selected.csv", index_col=0)
sampled = data.sample(6, random_state=28)
pdb_list = sampled.idcode.to_list()
In [38]:
In [41]:
pdb_list = sampled.idcode.to_list()
In [42]:
pdb_list = [a.lower() for a in pdb_list]
In [43]:
print(pdb_list)
In [35]:
data.sample(6, random_state=0)
Out[35]:
In [ ]: