In [2]:
import os
import sys
import random
import time
from random import seed, randint
import argparse
import platform
from datetime import datetime
import imp
import numpy as np
import fileinput
from itertools import product
import pandas as pd
from scipy.interpolate import griddata
from scipy.interpolate import interp2d
import seaborn as sns
from os import listdir

import matplotlib.pyplot as plt
import seaborn as sns
from scipy.interpolate import griddata
import matplotlib as mpl
# sys.path.insert(0,'..')
# from notebookFunctions import *
# from .. import notebookFunctions
from Bio.PDB.Polypeptide import one_to_three
from Bio.PDB.Polypeptide import three_to_one
from Bio.PDB.PDBParser import PDBParser
from pyCodeLib import *
from small_script.myFunctions import *
sys.path.insert(0, "/Users/weilu/openmmawsem")
from helperFunctions.myFunctions import *
from collections import defaultdict
%matplotlib inline
# plt.rcParams['figure.figsize'] = (10,6.180)    #golden ratio
# %matplotlib notebook
%load_ext autoreload
%autoreload 2

In [3]:
plt.rcParams['figure.figsize'] = np.array([16.18033, 10])    #golden ratio
plt.rcParams['figure.facecolor'] = 'w'
plt.rcParams['figure.dpi'] = 100
plt.rcParams.update({'font.size': 22})

In [35]:
fileName = "/Users/weilu/Research/optimization/chang_database/training_set.txt"
pdb_list = []
with open(fileName) as f:
    for line in f:
        pdbs = line.split()
        pdb_list += pdbs

In [37]:
len(pdb_list)


Out[37]:
387

In [60]:
pdb_list = glob.glob("/Users/weilu/Research/optimization/chang_database/cleaned_pdbs/*.pdb")

In [43]:
parser = PDBParser()

In [61]:
info = []
for pdbFile in pdb_list:
    pdb = pdbFile.split("/")[-1][:-4]
    s = parser.get_structure("X", pdbFile)
    residues = list(s.get_residues())
    n = len(residues)
    info.append([pdb, n])

In [71]:
data.to_csv("/Users/weilu/Research/optimization/chang_database/training_set.csv", index_label=0)

In [62]:
data = pd.DataFrame(info, columns=["Protein", "Length"])
data = data.sort_values("Length").reset_index(drop=True)

In [69]:
specific_decoys = data.query("Length < 150 and Length > 70")

In [75]:
specific_decoys


Out[75]:
Protein Length
26 1BAJ 71
27 1HOE 74
28 1HYP 75
29 1TIF 76
30 1VCC 77
31 1BY9 80
32 1BDO 80
33 451C 82
34 1CC5 83
35 1BB9 83
36 1PHT 83
37 1OPD 85
38 1A32 85
39 1PTF 87
40 1GVP 87
41 1CYO 88
42 1TIG 88
43 1CTJ 89
44 1FNA 91
45 1RZL 91
46 1WHO 94
47 2CBP 96
48 2ACY 98
49 1PLC 99
50 1BM8 99
51 1OPC 99
52 1PUC 101
53 3VUB 101
54 1TUL 102
55 1FMB 104
... ... ...
95 1CPQ 129
96 1PDO 129
97 3LZT 129
98 1MSC 129
99 1HMT 131
100 1HTP 131
101 1C52 131
102 1KUH 132
103 1CRB 134
104 1POC 134
105 1AQT 135
106 2END 137
107 5NUL 138
108 1PNE 139
109 1LCL 141
110 2SNS 141
111 1FLP 142
112 1TFE 142
113 1AX8 144
114 1PKP 145
115 1RSS 145
116 1JON 146
117 1VLS 146
118 1LBA 146
119 1ALY 146
120 1MBA 146
121 2HBG 147
122 1AKR 147
123 1OSA 148
124 1DIV 149

99 rows × 2 columns


In [64]:
data.hist("Length", bins=50)


Out[64]:
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x1a24f39c50>]],
      dtype=object)

In [63]:
data


Out[63]:
Protein Length
0 1AIE 31
1 1PPT 36
2 1AJJ 37
3 2ERL 40
4 4HB1 44
5 1BHP 45
6 1CBN 46
7 1PTQ 50
8 1BX7 51
9 1RB9 52
10 1MOF 53
11 2FDN 55
12 2KNT 58
13 1FXD 58
14 1NKD 59
15 1VIE 60
16 1TUD 60
17 4MT2 61
18 2IGD 61
19 1FAS 61
20 1AHO 64
21 2SN3 65
22 1MSI 66
23 1MJC 69
24 1AIL 70
25 1UTG 70
26 1BAJ 71
27 1HOE 74
28 1HYP 75
29 1TIF 76
... ... ...
349 1AB4 493
350 1A8H 500
351 1DPE 507
352 1TDJ 510
353 1BFD 523
354 1USH 525
355 5EAU 528
356 1DDT 535
357 1CTN 538
358 1AYL 540
359 1LCI 541
360 1TYV 554
361 1UOK 558
362 1VNS 574
363 1CIY 577
364 1XWL 580
365 1ECL 589
366 1CII 602
367 1SQC 619
368 1FCE 629
369 1GOF 639
370 1PJR 651
371 1ACC 722
372 1VOM 746
373 1BF2 750
374 1KIT 757
375 1DMR 779
376 1YGE 839
377 1FIY 881
378 1KCW 1040

379 rows × 2 columns


In [3]:
data = pd.read_csv("/Users/weilu/Research/database/tabularResults.csv")

In [4]:
data = data.dropna().reset_index(drop=True)

In [5]:
data.columns


Out[5]:
Index(['PDB ID', 'Chain ID', 'Residue Count', 'CATH ID [Res Range]',
       'Cath Description', 'SCOP ID [Res Range]', 'Scop Domain', 'Scop Fold',
       'PFAM Acc [Res Range]', 'Pfam ID', 'Pfam Description', 'Chain Length',
       'Sequence and Secondary Structure'],
      dtype='object')

In [7]:
pre = "/Users/weilu/Research/server/dec_2019/orengoftp.biochem.ucl.ac.uk/cath/releases/latest-release/cath-classification-data/cath-domain-list.txt"
data = pd.read_csv(pre, comment="#", sep="\s+", names=["Protein", "C", "A", "T", "H", "S1", "S2", "S3", "S4", "S5", "L", "resolution"])

In [8]:
data = data.astype(str)

In [9]:
data["CATH"] = data["C"] + "." + data["A"] + "." + data["T"] + "."  + data["H"]

In [10]:
# narrow down to 1.10
data = data.query("C == '1' and A == '10'").reset_index(drop=True)

In [11]:
data.query("CATH == '1.10.10.10'").query("Protein == '4yiiA00'")


Out[11]:
Protein C A T H S1 S2 S3 S4 S5 L resolution CATH
3720 4yiiA00 1 10 10 10 145 1 1 1 1 72 1.8 1.10.10.10

In [12]:
# 60694 domains in 1.10.
# 1.10 has 1226 superfamilies
count_superfamilies = data["CATH"].value_counts()
# 292 superfamilies has more than 20 domains.
# 87 superfamilies has more than 100 domains.

In [13]:
a = count_superfamilies[count_superfamilies>100]
superfamilies = list(a.index)
filtered_data = data.query("CATH in @superfamilies")

In [14]:
# read the list of domains that have structures

In [15]:
s_list = np.loadtxt("/Users/weilu/Research/server/dec_2019/cath_dataset/cath-dataset-nonredundant-S20.list", dtype="str")

In [26]:
s_list = np.loadtxt("/Users/weilu/Research/server/dec_2019/cath_dataset/cath-dataset-nonredundant-S20Clean.list", dtype="str")

In [16]:
len(s_list)


Out[16]:
14433

In [27]:
len(s_list)


Out[27]:
14224

In [28]:
has_structures = filtered_data.query("Protein in @s_list").reset_index(drop=True)

In [29]:
len(has_structures)


Out[29]:
1019

In [30]:
has_structures["L"] = has_structures["L"].astype(int)

In [31]:
has_structures_small = has_structures.query("L < 200")

In [32]:
len(has_structures_small)


Out[32]:
911

In [33]:
has_structures_small.to_csv("/Users/weilu/Research/server/jan_2020/cath_dataset_shuffle_optimization/has_structures_small_dataset_cleaned.csv", index=0)

In [25]:
has_structures_small


Out[25]:
Protein C A T H S1 S2 S3 S4 S5 L resolution CATH
0 3b89A01 1 10 8 10 2 1 1 2 1 54 2.6 1.10.8.10
1 1oksA00 1 10 8 10 6 1 1 1 1 51 1.8 1.10.8.10
2 2ltuA00 1 10 8 10 7 2 1 1 1 62 999.0 1.10.8.10
3 1zv1A00 1 10 8 10 8 1 1 1 1 59 1.6 1.10.8.10
4 1cukA03 1 10 8 10 11 1 1 1 1 48 1.9 1.10.8.10
5 2oo9A00 1 10 8 10 13 2 1 1 1 44 2.1 1.10.8.10
6 3lcuA01 1 10 8 10 14 1 1 2 1 57 2.1 1.10.8.10
7 1p3qQ00 1 10 8 10 15 1 1 1 1 43 1.7 1.10.8.10
8 3bbzA00 1 10 8 10 16 1 1 1 1 48 2.1 1.10.8.10
9 1y8gA03 1 10 8 10 21 2 2 1 1 39 2.5 1.10.8.10
10 3ihpB05 1 10 8 10 27 1 1 1 1 54 2.8 1.10.8.10
11 1a5tA02 1 10 8 10 28 1 1 1 1 40 2.2 1.10.8.10
12 2hwjA02 1 10 8 10 30 1 1 1 1 59 2.61 1.10.8.10
13 2lbcA01 1 10 8 10 31 1 2 1 1 42 999.0 1.10.8.10
14 1tr8A02 1 10 8 10 32 1 1 1 1 37 2.27 1.10.8.10
15 4dbgB02 1 10 8 10 33 1 1 1 1 61 2.71 1.10.8.10
16 1g4aE02 1 10 8 10 35 1 1 1 1 47 3.0 1.10.8.10
17 1nv8B01 1 10 8 10 36 1 1 1 1 71 2.2 1.10.8.10
18 1bvsF03 1 10 8 10 37 1 1 1 1 45 3.0 1.10.8.10
19 4dbgB03 1 10 8 10 39 1 1 1 1 38 2.71 1.10.8.10
20 4f1iA01 1 10 8 10 41 1 1 2 1 78 2.5 1.10.8.10
21 1g3iF02 1 10 8 10 43 1 2 1 1 23 3.41 1.10.8.10
22 1q02A00 1 10 8 10 46 1 1 1 1 52 999.0 1.10.8.10
23 2mgwA00 1 10 8 10 49 1 1 1 1 52 999.0 1.10.8.10
24 1r4gA00 1 10 8 10 50 1 1 1 1 53 999.0 1.10.8.10
25 2knaA01 1 10 8 10 51 1 1 1 1 54 999.0 1.10.8.10
26 1otrA00 1 10 8 10 53 1 1 1 1 49 999.0 1.10.8.10
27 2jp7A00 1 10 8 10 55 1 1 1 1 57 999.0 1.10.8.10
28 2l4eA00 1 10 8 10 56 1 1 1 1 57 999.0 1.10.8.10
29 1pgyA00 1 10 8 10 57 1 1 1 1 47 999.0 1.10.8.10
... ... ... ... ... ... ... ... ... ... ... ... ... ...
987 1p94A00 1 10 1220 10 30 1 1 1 1 76 999.0 1.10.1220.10
995 1vq8200 1 10 1620 10 1 1 1 1 1 46 2.2 1.10.1620.10
996 1vq8P01 1 10 1650 10 1 1 1 1 1 55 2.2 1.10.1650.10
997 1m3qA02 1 10 1670 10 1 1 1 3 1 80 1.9 1.10.1670.10
998 1pu6A01 1 10 1670 10 2 1 1 1 1 93 1.64 1.10.1670.10
999 3kntC01 1 10 1670 10 3 2 1 2 1 95 2.7 1.10.1670.10
1000 1xg7B01 1 10 1670 10 7 1 1 1 1 96 1.88 1.10.1670.10
1001 1rrsA01 1 10 1670 10 12 1 2 3 1 111 2.4 1.10.1670.10
1002 1mpgA03 1 10 1670 10 13 1 1 1 1 52 1.8 1.10.1670.10
1003 4unfA01 1 10 1670 10 14 1 1 1 1 119 2.15 1.10.1670.10
1004 4cxfA01 1 10 1740 10 1 2 1 1 1 87 1.75 1.10.1740.10
1005 3czgA01 1 10 1740 10 5 1 1 1 1 85 1.8 1.10.1740.10
1006 1jvsA03 1 10 1740 10 6 1 1 2 1 85 2.2 1.10.1740.10
1007 3ucqA01 1 10 1740 10 10 1 1 1 1 95 1.97 1.10.1740.10
1008 1rp3A01 1 10 1740 10 11 1 1 1 1 85 2.3 1.10.1740.10
1009 4nqwA01 1 10 1740 10 12 1 1 1 1 84 2.4 1.10.1740.10
1010 3t0yA01 1 10 1740 10 15 1 1 1 1 65 2.1 1.10.1740.10
1011 1h19A02 1 10 1740 60 1 1 1 3 1 97 2.1 1.10.1740.60
1012 2gtqA02 1 10 1740 60 2 3 8 1 1 80 2.05 1.10.1740.60
1013 2q6fB03 1 10 1840 10 3 1 1 1 1 100 2.0 1.10.1840.10
1018 3kh1A00 1 10 3210 10 7 1 1 1 1 192 1.37 1.10.3210.10
1020 5tk8A00 1 10 3210 10 13 1 1 1 1 191 1.64 1.10.3210.10
1021 4n6wA00 1 10 3210 10 19 1 1 3 1 180 1.85 1.10.3210.10
1022 2ogiA00 1 10 3210 10 20 1 1 2 1 190 1.85 1.10.3210.10
1024 2pgsA01 1 10 3210 10 23 2 1 1 1 172 2.35 1.10.3210.10
1025 1yoyA00 1 10 3210 10 24 1 1 2 1 146 2.0 1.10.3210.10
1027 3hi0A03 1 10 3210 10 27 1 1 1 1 199 2.3 1.10.3210.10
1028 1vj7B01 1 10 3210 10 29 1 1 1 1 173 2.1 1.10.3210.10
1029 2floA03 1 10 3210 10 30 1 1 2 1 141 2.2 1.10.3210.10
1030 2f6sA00 1 10 3290 10 1 2 1 1 1 176 2.5 1.10.3290.10

925 rows × 13 columns


In [75]:
for_optimization = has_structures_small.groupby("CATH").head(1).reset_index(drop=True)

In [76]:
for_optimization["L"].astype(int).hist(bins=100)


Out[76]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a32047198>

In [78]:
for_optimization.to_csv("/Users/weilu/Research/server/dec_2019/optimization_database/for_optimization_dataset.csv")

In [80]:
data = pd.read_csv("/Users/weilu/Research/server/dec_2019/optimization_database/for_optimization_dataset.csv", index_col=0)
data["Protein"].to_list()

In [85]:
a = for_optimization["Protein"].to_list()

In [89]:
print(sorted(a))


['132lA00', '1a75A00', '1b82A02', '1b8fA01', '1bekA01', '1bqbA02', '1cpcB00', '1cscA02', '1cy5A00', '1dv5A00', '1e8yA05', '1evyA02', '1f44A01', '1fkaG00', '1fx0B03', '1g1xB00', '1gteA02', '1gu2A00', '1h19A02', '1hk5A03', '1in4A03', '1kw4A00', '1kx3D00', '1l1fA03', '1m1qA00', '1m3qA02', '1mu5A02', '1qwvA00', '1vq8200', '1vq8P01', '1xb0B00', '1xjtA00', '1xmkA00', '1z0xA01', '1zcaA02', '2cpgB00', '2f6sA00', '2grhA00', '2ii2A04', '2ptsA03', '2q6fB03', '2uxwA02', '2vqeD01', '2vyrA00', '2wh6A00', '3b89A01', '3g0vA00', '3geuA00', '3h7iA02', '3h99A03', '3hrdD02', '3ju5A01', '3k06B00', '3kh1A00', '3nasA02', '3oc7A02', '3p1wA03', '3pyoY00', '3qyhB01', '3tjtA01', '3vgjA02', '3ziaG01', '4annA00', '4bcmD01', '4cxfA01', '4du6E01', '4edmA00', '4fppB01', '4i2aA01', '4i4tB03', '4i6uB00', '4jgsE00', '4kbuK02', '4n73A00', '5fpoA01', '5hgoA01', '5kn9A02']

In [87]:
len(a)


Out[87]:
77

In [56]:
len(s_list)


Out[56]:
14433

In [50]:


In [39]:
data.shape


Out[39]:
(60694, 13)

In [99]:
data = pd.read_csv("/Users/weilu/Research/server/jan_2020/multi_seq_Pfam/data_info_3.csv", index_col=0)

In [100]:
data.shape


Out[100]:
(1829, 6)

In [103]:
data.hist("Length", bins=50)


Out[103]:
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x1a705aa828>]],
      dtype=object)

In [109]:
d = data.query("Length < 300").query("Problematic != 4").reset_index(drop=True)

In [118]:
def get_MSA_size(location):
    a = None
    with open(location) as f:
        a = f.readlines()
    return len(a)

In [117]:
location = "/Users/weilu/Research/server/jan_2020/multi_seq_Pfam/alignments/1Y7QB_42-131_filtered_0.05.seqs"
get_MSA_size(location)


Out[117]:
2475

In [123]:
pre = "/Users/weilu/Research/server/jan_2020/multi_seq_Pfam/alignments"
d["MSA_size"] = d["FullName"].apply(lambda x:get_MSA_size(f"{pre}/{x}_filtered_0.05.seqs"))

In [124]:
d["Length"].hist(bins=50)


Out[124]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a6a2adba8>

In [126]:
d["SeqLength_diff"] = d["SeqLength"] - d["Length"]

In [132]:
d.shape


Out[132]:
(1639, 8)

In [135]:
d2 = d.query("abs(SeqLength_diff) < 5").reset_index(drop=True)

In [140]:
d2.shape


Out[140]:
(1228, 8)

In [138]:
d2["MSA_size"].hist(bins=50)


Out[138]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a6a62b2b0>

In [141]:
d2.to_csv("/Users/weilu/Research/database/pfam_selected.csv")

In [144]:
d2 = pd.read_csv("/Users/weilu/Research/database/pfam_selected.csv", index_col=0)

In [77]:
fromFile = "/Users/weilu/Research/server/feb_2020/jeff/feb04_2/original_pdbs/test10_selpro.pdb"
fixer = PDBFixer(filename=fromFile)

In [13]:
pd.read_pickle("/Users/weilu/Research/server/mar_2020/mass_iterative_optimization/optimization_iter0_com/decoys/openMM/1a1x_iteration_0.pkl")


Out[13]:
Steps Qw Rg Backbone Rama Contact Fragment Exclude_Side SideChain Exclude Beta Pap Helical Total Run Protein Folder structure
0 1952 0.28 16.38 115.38 -541.86 -256.61 -83.82 23.25 40.90 1.39 -90.52 -31.19 0.0 -823.07 0 1a1x iteration_0 (((<Atom CA>, <Atom C>, <Atom O>, <Atom CB>), ...
1 1953 0.28 15.93 108.30 -537.23 -263.13 -79.74 34.59 41.74 3.04 -86.25 -32.08 0.0 -810.76 0 1a1x iteration_0 (((<Atom CA>, <Atom C>, <Atom O>, <Atom CB>), ...
2 1954 0.28 16.38 125.86 -536.00 -260.48 -79.89 26.26 29.38 2.66 -90.38 -31.90 0.0 -814.49 0 1a1x iteration_0 (((<Atom CA>, <Atom C>, <Atom O>, <Atom CB>), ...
3 1955 0.29 16.27 108.21 -535.76 -260.33 -83.02 29.98 30.83 4.81 -83.90 -30.37 0.0 -819.57 0 1a1x iteration_0 (((<Atom CA>, <Atom C>, <Atom O>, <Atom CB>), ...
4 1956 0.29 16.38 114.33 -537.25 -265.41 -82.94 30.12 37.33 5.64 -84.92 -31.49 0.0 -814.58 0 1a1x iteration_0 (((<Atom CA>, <Atom C>, <Atom O>, <Atom CB>), ...
5 1957 0.29 16.24 115.07 -538.25 -262.33 -82.59 26.73 35.34 5.60 -79.60 -29.21 0.0 -809.23 0 1a1x iteration_0 (((<Atom CA>, <Atom C>, <Atom O>, <Atom CB>), ...
6 1958 0.28 16.10 103.80 -538.35 -265.36 -81.61 31.98 28.40 6.43 -85.96 -31.67 0.0 -832.35 0 1a1x iteration_0 (((<Atom CA>, <Atom C>, <Atom O>, <Atom CB>), ...
7 1959 0.28 16.34 125.42 -536.45 -257.20 -81.81 26.03 36.87 6.21 -82.98 -31.36 0.0 -795.26 0 1a1x iteration_0 (((<Atom CA>, <Atom C>, <Atom O>, <Atom CB>), ...
8 1960 0.28 16.56 115.60 -534.34 -262.67 -80.81 26.81 35.20 2.72 -79.91 -30.53 0.0 -807.92 0 1a1x iteration_0 (((<Atom CA>, <Atom C>, <Atom O>, <Atom CB>), ...
9 1961 0.27 16.39 123.69 -540.54 -260.08 -80.59 28.94 37.83 4.79 -82.19 -30.24 0.0 -798.39 0 1a1x iteration_0 (((<Atom CA>, <Atom C>, <Atom O>, <Atom CB>), ...
10 1962 0.27 16.57 128.17 -538.88 -255.19 -81.20 24.78 33.55 6.02 -87.43 -30.68 0.0 -800.86 0 1a1x iteration_0 (((<Atom CA>, <Atom C>, <Atom O>, <Atom CB>), ...
11 1963 0.27 16.53 98.11 -539.14 -258.22 -81.69 27.91 37.39 4.01 -80.55 -31.84 0.0 -824.02 0 1a1x iteration_0 (((<Atom CA>, <Atom C>, <Atom O>, <Atom CB>), ...
12 1964 0.28 16.17 115.56 -534.07 -256.83 -82.97 22.78 34.32 2.75 -81.20 -33.05 0.0 -812.70 0 1a1x iteration_0 (((<Atom CA>, <Atom C>, <Atom O>, <Atom CB>), ...
13 1965 0.28 16.11 121.31 -538.02 -263.90 -82.25 26.42 31.60 2.11 -84.37 -31.68 0.0 -818.79 0 1a1x iteration_0 (((<Atom CA>, <Atom C>, <Atom O>, <Atom CB>), ...
14 1966 0.27 16.21 123.17 -539.12 -265.22 -82.34 29.88 31.67 4.52 -84.84 -30.83 0.0 -813.10 0 1a1x iteration_0 (((<Atom CA>, <Atom C>, <Atom O>, <Atom CB>), ...
15 1967 0.28 16.14 102.91 -538.77 -264.58 -81.11 28.94 34.01 2.38 -83.96 -30.63 0.0 -830.80 0 1a1x iteration_0 (((<Atom CA>, <Atom C>, <Atom O>, <Atom CB>), ...
16 1968 0.28 16.21 113.21 -539.05 -263.12 -80.05 30.35 34.62 4.59 -84.61 -33.03 0.0 -817.11 0 1a1x iteration_0 (((<Atom CA>, <Atom C>, <Atom O>, <Atom CB>), ...
17 1969 0.28 16.38 121.67 -539.51 -259.51 -81.53 24.27 36.89 3.86 -81.82 -30.89 0.0 -806.57 0 1a1x iteration_0 (((<Atom CA>, <Atom C>, <Atom O>, <Atom CB>), ...
18 1970 0.27 16.33 118.83 -544.41 -257.67 -80.80 27.45 29.28 4.71 -83.61 -32.65 0.0 -818.88 0 1a1x iteration_0 (((<Atom CA>, <Atom C>, <Atom O>, <Atom CB>), ...
19 1971 0.27 16.47 114.26 -531.71 -266.48 -83.53 23.19 36.16 4.78 -87.33 -29.28 0.0 -819.93 0 1a1x iteration_0 (((<Atom CA>, <Atom C>, <Atom O>, <Atom CB>), ...
20 1972 0.27 16.69 112.66 -532.82 -263.63 -82.19 29.09 31.18 3.91 -78.67 -29.57 0.0 -810.05 0 1a1x iteration_0 (((<Atom CA>, <Atom C>, <Atom O>, <Atom CB>), ...
21 1973 0.27 16.33 112.61 -542.59 -258.33 -80.21 29.67 42.25 3.74 -79.72 -31.59 0.0 -804.17 0 1a1x iteration_0 (((<Atom CA>, <Atom C>, <Atom O>, <Atom CB>), ...
22 1974 0.28 16.27 111.65 -543.34 -254.50 -84.61 23.64 35.08 5.88 -88.39 -31.36 0.0 -825.93 0 1a1x iteration_0 (((<Atom CA>, <Atom C>, <Atom O>, <Atom CB>), ...
23 1975 0.28 16.33 114.08 -538.72 -257.36 -82.70 29.24 35.45 5.46 -82.63 -29.95 0.0 -807.13 0 1a1x iteration_0 (((<Atom CA>, <Atom C>, <Atom O>, <Atom CB>), ...
24 1976 0.28 16.40 126.41 -541.11 -268.21 -83.42 27.49 25.89 3.04 -87.21 -29.98 0.0 -827.09 0 1a1x iteration_0 (((<Atom CA>, <Atom C>, <Atom O>, <Atom CB>), ...
25 1977 0.28 16.38 119.15 -541.82 -266.71 -82.94 26.79 35.97 2.86 -83.21 -31.09 0.0 -821.01 0 1a1x iteration_0 (((<Atom CA>, <Atom C>, <Atom O>, <Atom CB>), ...
26 1978 0.28 16.15 126.16 -530.00 -260.71 -83.02 26.73 31.85 2.24 -83.92 -31.69 0.0 -802.35 0 1a1x iteration_0 (((<Atom CA>, <Atom C>, <Atom O>, <Atom CB>), ...
27 1979 0.29 16.03 114.25 -535.64 -262.54 -83.83 32.56 25.82 3.79 -78.90 -32.78 0.0 -817.26 0 1a1x iteration_0 (((<Atom CA>, <Atom C>, <Atom O>, <Atom CB>), ...
28 1980 0.28 16.32 113.13 -542.83 -259.90 -83.17 27.37 31.77 4.03 -83.65 -30.92 0.0 -824.17 0 1a1x iteration_0 (((<Atom CA>, <Atom C>, <Atom O>, <Atom CB>), ...
29 1981 0.28 16.35 108.58 -535.76 -259.02 -79.95 24.85 32.39 3.20 -78.55 -33.37 0.0 -817.63 0 1a1x iteration_0 (((<Atom CA>, <Atom C>, <Atom O>, <Atom CB>), ...
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
70 1972 0.30 15.56 117.15 -545.37 -232.95 -79.54 22.77 37.91 2.05 -99.76 -36.85 0.0 -814.59 1 1a1x iteration_0 (((<Atom CA>, <Atom C>, <Atom O>, <Atom CB>), ...
71 1973 0.29 15.06 112.71 -538.72 -242.68 -76.83 22.57 36.53 3.76 -95.17 -34.99 0.0 -812.83 1 1a1x iteration_0 (((<Atom CA>, <Atom C>, <Atom O>, <Atom CB>), ...
72 1974 0.29 15.15 116.88 -536.01 -237.82 -74.92 25.64 41.53 4.69 -98.98 -39.40 0.0 -798.41 1 1a1x iteration_0 (((<Atom CA>, <Atom C>, <Atom O>, <Atom CB>), ...
73 1975 0.29 14.73 110.69 -544.25 -234.41 -75.79 23.45 29.99 4.45 -98.94 -39.76 0.0 -824.57 1 1a1x iteration_0 (((<Atom CA>, <Atom C>, <Atom O>, <Atom CB>), ...
74 1976 0.29 14.77 109.69 -540.07 -249.37 -77.24 28.82 31.00 5.63 -96.06 -35.75 0.0 -823.35 1 1a1x iteration_0 (((<Atom CA>, <Atom C>, <Atom O>, <Atom CB>), ...
75 1977 0.30 14.82 112.14 -545.12 -246.79 -75.94 27.60 31.36 3.26 -99.50 -35.70 0.0 -828.69 1 1a1x iteration_0 (((<Atom CA>, <Atom C>, <Atom O>, <Atom CB>), ...
76 1978 0.30 15.05 104.85 -544.12 -246.75 -74.55 23.26 33.35 3.39 -92.83 -35.24 0.0 -828.65 1 1a1x iteration_0 (((<Atom CA>, <Atom C>, <Atom O>, <Atom CB>), ...
77 1979 0.29 14.40 106.10 -538.87 -238.87 -75.44 23.94 31.08 1.96 -97.47 -32.22 0.0 -819.80 1 1a1x iteration_0 (((<Atom CA>, <Atom C>, <Atom O>, <Atom CB>), ...
78 1980 0.29 14.46 107.99 -541.58 -240.90 -75.42 25.00 41.31 5.17 -94.71 -36.19 0.0 -809.32 1 1a1x iteration_0 (((<Atom CA>, <Atom C>, <Atom O>, <Atom CB>), ...
79 1981 0.30 14.60 117.03 -545.49 -244.84 -78.72 26.88 37.48 3.08 -97.35 -36.41 0.0 -818.34 1 1a1x iteration_0 (((<Atom CA>, <Atom C>, <Atom O>, <Atom CB>), ...
80 1982 0.31 14.93 114.95 -542.61 -245.99 -78.02 22.78 29.54 3.61 -92.96 -35.33 0.0 -824.02 1 1a1x iteration_0 (((<Atom CA>, <Atom C>, <Atom O>, <Atom CB>), ...
81 1983 0.30 14.63 115.85 -541.57 -246.35 -76.37 31.46 46.90 4.76 -102.08 -33.83 0.0 -801.23 1 1a1x iteration_0 (((<Atom CA>, <Atom C>, <Atom O>, <Atom CB>), ...
82 1984 0.29 14.88 103.78 -538.60 -235.82 -72.64 25.76 35.31 2.72 -103.76 -39.46 0.0 -822.71 1 1a1x iteration_0 (((<Atom CA>, <Atom C>, <Atom O>, <Atom CB>), ...
83 1985 0.30 14.82 120.02 -540.05 -240.84 -75.68 23.90 34.32 6.83 -97.48 -38.92 0.0 -807.90 1 1a1x iteration_0 (((<Atom CA>, <Atom C>, <Atom O>, <Atom CB>), ...
84 1986 0.29 15.17 107.70 -541.81 -240.61 -76.03 26.14 35.98 4.86 -98.66 -38.53 0.0 -820.95 1 1a1x iteration_0 (((<Atom CA>, <Atom C>, <Atom O>, <Atom CB>), ...
85 1987 0.30 15.14 123.02 -546.00 -242.81 -75.74 23.68 32.70 4.30 -97.04 -38.17 0.0 -816.07 1 1a1x iteration_0 (((<Atom CA>, <Atom C>, <Atom O>, <Atom CB>), ...
86 1988 0.30 15.01 102.88 -543.46 -240.04 -76.27 27.47 42.50 3.14 -100.65 -36.32 0.0 -820.75 1 1a1x iteration_0 (((<Atom CA>, <Atom C>, <Atom O>, <Atom CB>), ...
87 1989 0.30 15.46 106.49 -544.33 -244.29 -74.91 23.07 29.72 5.68 -96.79 -34.23 0.0 -829.58 1 1a1x iteration_0 (((<Atom CA>, <Atom C>, <Atom O>, <Atom CB>), ...
88 1990 0.30 15.48 108.04 -545.60 -230.78 -76.99 24.48 33.38 10.17 -98.22 -39.25 0.0 -814.77 1 1a1x iteration_0 (((<Atom CA>, <Atom C>, <Atom O>, <Atom CB>), ...
89 1991 0.30 15.28 109.05 -543.42 -242.62 -78.86 25.93 37.84 5.28 -102.82 -35.43 0.0 -825.04 1 1a1x iteration_0 (((<Atom CA>, <Atom C>, <Atom O>, <Atom CB>), ...
90 1992 0.31 15.57 113.04 -536.31 -235.47 -77.00 21.80 28.52 3.95 -97.71 -37.52 0.0 -816.69 1 1a1x iteration_0 (((<Atom CA>, <Atom C>, <Atom O>, <Atom CB>), ...
91 1993 0.30 15.54 120.31 -540.12 -234.42 -75.10 23.73 26.86 3.28 -92.78 -36.62 0.0 -804.86 1 1a1x iteration_0 (((<Atom CA>, <Atom C>, <Atom O>, <Atom CB>), ...
92 1994 0.30 15.77 112.79 -531.87 -244.56 -75.30 23.37 34.93 2.83 -93.13 -39.53 0.0 -810.46 1 1a1x iteration_0 (((<Atom CA>, <Atom C>, <Atom O>, <Atom CB>), ...
93 1995 0.29 15.93 101.54 -542.05 -239.97 -74.11 20.66 41.73 5.94 -86.97 -36.84 0.0 -810.07 1 1a1x iteration_0 (((<Atom CA>, <Atom C>, <Atom O>, <Atom CB>), ...
94 1996 0.29 15.93 120.18 -541.46 -239.34 -73.77 21.46 34.58 2.25 -94.14 -39.25 0.0 -809.49 1 1a1x iteration_0 (((<Atom CA>, <Atom C>, <Atom O>, <Atom CB>), ...
95 1997 0.29 15.77 110.02 -543.52 -243.36 -73.84 28.22 27.17 3.77 -95.47 -39.91 0.0 -826.93 1 1a1x iteration_0 (((<Atom CA>, <Atom C>, <Atom O>, <Atom CB>), ...
96 1998 0.29 15.87 108.01 -545.43 -239.90 -73.44 28.47 33.94 3.39 -96.32 -38.91 0.0 -820.18 1 1a1x iteration_0 (((<Atom CA>, <Atom C>, <Atom O>, <Atom CB>), ...
97 1999 0.29 15.82 112.71 -544.04 -244.26 -77.21 22.30 31.79 3.64 -98.00 -37.06 0.0 -830.14 1 1a1x iteration_0 (((<Atom CA>, <Atom C>, <Atom O>, <Atom CB>), ...
98 2000 0.29 16.02 109.64 -542.37 -240.94 -77.22 22.48 41.26 3.16 -96.75 -38.12 0.0 -818.87 1 1a1x iteration_0 (((<Atom CA>, <Atom C>, <Atom O>, <Atom CB>), ...
99 2001 0.28 16.03 113.90 -536.30 -232.76 -76.58 24.44 37.30 3.44 -93.48 -39.78 0.0 -799.82 1 1a1x iteration_0 (((<Atom CA>, <Atom C>, <Atom O>, <Atom CB>), ...

100 rows × 18 columns


In [ ]:


In [ ]:


In [15]:
ls


18-02-28 Parsing PFAM database.ipynb
CPU_openAWSEM_benchmark.ipynb
Contact_Optimization_feb26.ipynb
Contact_Optimization_mar02.ipynb
Contact_Optimization_mar26.ipynb
Cornichon.ipynb
Curate_optimization_dataset.ipynb
Cys_energy_term.ipynb
DMP_contact_visual_contact_map_oct04.ipynb
ER_rnative_visual.ipynb
ER_rnative_visual_Agu08.ipynb
Expand_contact_table.ipynb
Experiment_LSTM.ipynb
FourBody.ipynb
Group_Contact_Optimization_apr23.ipynb
HybridDatabase.ipynb
Hybrid_contact.ipynb
LSTM.ipynb
LSTM_test.ipynb
MembraneProteinDatabase.ipynb
MembraneProteinDatabase_largeData.ipynb
MembraneProteinDatabase_moredata.ipynb
MembraneProteinDatabase_oct16.ipynb
Membrane_contact.ipynb
Membrane_contact_Optimization_helperFunctions_Oct17.ipynb
Membrane_pore_and_disulfide_bond.ipynb
Multi_Seq.ipynb
Optimization_helperFunctions.ipynb
Optimization_helperFunctions_Sep20.ipynb
Optimization_helperFunctions_Sep23.ipynb
Optimization_helperFunctions_Sep24.ipynb
Optimization_helperFunctions_Sep25.ipynb
Optimization_helperFunctions_Sep26.ipynb
Optimization_helperFunctions_Sep27.ipynb
Optimization_helperFunctions_Sep27_clean.ipynb
Optimization_helperFunctions_Sep28.ipynb
Optimization_helperFunctions_setup-Copy1.ipynb
Optimization_helperFunctions_setup.ipynb
Optimization_helperFunctions_setup_clean.ipynb
Optimization_helperFunctions_setup_oct08.ipynb
Optimization_helperFunctions_setup_specific_decoy_structures_nov03.ipynb
PCA_exploration.ipynb
Select_frames_apr12.ipynb
Validate_hamiltonian.ipynb
Z_score_evaluation.ipynb
analyze_membrane_simulation_data_jun07.ipynb
analyze_simulation_data_apr17.ipynb
analyze_simulation_data_mar30.ipynb
analyze_simulation_data_may05.ipynb
analyze_simulation_data_nov18.ipynb
benchmark_side_chain_effect.ipynb
cb_cb_distance_database_check.ipynb
cleaned_pdbs/
com_com_exclude_volume_term_parameters.ipynb
combine_raptor_ER_map.ipynb
compute_energy.ipynb
contact_optimization_lagrange_multiplier.ipynb
convert_multi_chain_lammps_to_openmm.ipynb
cys_openMM_figures_plot.ipynb
cys_protein_simulation_analysis_aug13.ipynb
cys_protein_simulation_analysis_aug15.ipynb
cys_protein_simulation_analysis_nov12.ipynb
data_jan20.csv
diffusion_timescale.ipynb
diffusion_timescale_dec03.ipynb
direct_contact.pdf
domain_separation_and_recombine.ipynb
experiment_with_openMM.ipynb
extract_info_from_slurms_out_and_pdb_seqres.ipynb
family_fold_may11.ipynb
frag_predict_cluster.ipynb
frag_predict_cluster_feb26.ipynb
fragment_optimization.ipynb
fragment_optimization_data_analysis.ipynb
frags.mem
hamiltonian_averaging.ipynb
helperFunction_palyGround.ipynb
hybrid_simulation_analysis_aug05.ipynb
hybrid_simulation_analysis_aug11.ipynb
hybrid_simulation_analysis_aug12.ipynb
hybrid_simulation_analysis_aug18.ipynb
hybrid_simulation_analysis_dec07.ipynb
hybrid_simulation_analysis_jul01.ipynb
hybrid_simulation_analysis_jul12.ipynb
hybrid_simulation_analysis_jun29.ipynb
hybrid_simulation_analysis_nov25.ipynb
interaction_well.ipynb
iris.html
iter0.ipynb
iterative_optimization_simulations.ipynb
iterative_optimization_simulations_fragmemory.ipynb
mass_iterative_optimization_simulations_feb07.ipynb
max_Q_wat_frame_202.pdb
multi_chain_support.ipynb
multi_seq_iteration.ipynb
new_old_beta_prediction_comparision.ipynb
nov16_run_openmm.ipynb
nov17_lammps_to_openmm.ipynb
obsolete/
openAWSEM.ipynb
openAWSEM_benchmark.ipynb
openmmReading.ipynb
optimization.ipynb
optimization_dec09.ipynb
optimization_dec28.ipynb
optimization_feb07.ipynb
optimization_feb14.ipynb
optimization_z_score_feb06.ipynb
out.pdb
pap_term_python_with_abit_beta_term.ipynb
plot_explore.ipynb
porter5_ssweight.ipynb
protein_mediated.pdf
read_gamma.ipynb
read_pdbs.ipynb
read_topology_prediction.ipynb
real_contact_cbd_cbd_exclude_volume_term_parameters.ipynb
relative_k.ipynb
relative_k_nov04_clean.ipynb
relative_k_oct16.ipynb
relative_k_oct31.ipynb
represent_center_of_mass_of_side_chain.ipynb
represent_center_of_mass_of_side_chain_convert_to_openMM_feb15.ipynb
rewrite_phis_computation_reading_format.ipynb
secondary_prediction_software_test_netsurfp2.ipynb
shift_z_optimization.ipynb
simulation_analysis_membrane_oct19.ipynb
small_molecular.ipynb
structure_prediction_analysis_another_set_jan17.ipynb
structure_prediction_analysis_combined_mar31.ipynb
structure_prediction_analysis_combined_mar31_with_frag.ipynb
structure_prediction_analysis_membrane_protein_jun08.ipynb
structure_prediction_analysis_old_set.ipynb
structure_prediction_analysis_single_apr30.ipynb
structure_prediction_analysis_test_apr18.ipynb
structure_prediction_analysis_test_may06.ipynb
structure_prediction_analysis_test_set_mar29.ipynb
structure_prediction_frag_memory.ipynb
survey_density_info_for_database.ipynb
survey_density_info_for_database_jan31.ipynb
toLocation
topoogy_prediction_comparision_dec04.ipynb
understand_frag_memory_generation.ipynb
visual_check_of_all_19_amino_acid_gmm.ipynb
zimPosition

In [ ]: