In [80]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
import glob
from small_script.myFunctions import *
import feather
import Bio.PDB as bio
from sklearn.metrics import confusion_matrix
d3_to_index = bio.Polypeptide.d3_to_index # we may want to adjust this in the future.
three_to_one = bio.Polypeptide.three_to_one
one_to_index = bio.Polypeptide.one_to_index
%matplotlib inline
%load_ext autoreload
%autoreload 2
plt.rcParams['figure.figsize'] = [16.18033, 10]
In [2]:
from keras.preprocessing import sequence
from keras.models import Sequential
from keras.layers import Dense, Dropout, Embedding, LSTM, Bidirectional
from keras.datasets import imdb
from keras.utils import to_categorical
from keras.models import model_from_json
from keras.metrics import top_k_categorical_accuracy
max_features = 20
batch_size = 1024*2
maxlen = 9
n = int(1e4)
In [ ]:
def getFrags(pdb):
seqFile = f"/Users/weilu/Research/server/jan_2019/iterative_optimization_another_set/all_simulations/{pdb}/{pdb}/{pdb}.seq"
with open(seqFile) as f:
lines = f.readlines()
a = lines[0].strip()
all_frag = []
for i in range(0, len(a)-8):
frag = a[i:i+9]
# print(frag)
fragIndex = [one_to_index(x) for x in frag]
# print(fragIndex)
all_frag.append(fragIndex)
all_frag = np.array(all_frag)
predicted_cluster = loaded_model.predict_classes(all_frag)
pre = "/Users/weilu/Research/server/jan_2019/iterative_optimization_another_set/fragment_memory/"
header = '''\
[Target]
query
[Memories]
'''
with open(pre+f"{pdb}.mem", "w") as out:
out.write(header)
for index, i in enumerate(predicted_cluster):
out.write(f"fraglib/{i}.gro {index+1} 1 9 20\n")
In [222]:
def getFrags(pdb, toLocation, top_n=1, evenWeight=True):
seqFile = f"/Users/weilu/Research/server/jan_2019/iterative_optimization_another_set/all_simulations/{pdb}/{pdb}/{pdb}.seq"
with open(seqFile) as f:
lines = f.readlines()
a = lines[0].strip()
all_frag = []
for i in range(0, len(a)-8):
frag = a[i:i+9]
# print(frag)
fragIndex = [one_to_index(x) for x in frag]
# print(fragIndex)
all_frag.append(fragIndex)
all_frag = np.array(all_frag)
predict_prob = loaded_model.predict(all_frag)
clusters = np.argsort(-predict_prob)[:, :top_n]
n = predict_prob.shape[0]
prob = predict_prob[np.arange(n).reshape(n,1), clusters]
prob /= (prob.sum(axis=1)).reshape(n,1)
# pre = "/Users/weilu/Research/server/jan_2019/iterative_optimization_another_set/fragment_memory/"
pre = toLocation
header = '''\
[Target]
query
[Memories]
'''
with open(pre+f"{pdb}.mem", "w") as out:
out.write(header)
if evenWeight:
weight = 20/top_n
for index, c in enumerate(clusters):
for index2, i in enumerate(c):
if not evenWeight:
weight = prob[index][index2]
out.write(f"fraglib/{i}.gro {index+1} 1 9 {weight:.3}\n")
In [4]:
json_file = open("/Users/weilu/Research/optimization/fragment/lstm100/model.json", "r")
loaded_model_json = json_file.read()
json_file.close()
loaded_model = model_from_json(loaded_model_json)
loaded_model.load_weights("/Users/weilu/Research/optimization/fragment/lstm100/model.h5")
In [7]:
pre = "/Users/weilu/Research/optimization/fragment/lstm100/"
x_train = np.load(pre+"trainX.npy")
y_train = np.load(pre+"trainY.npy")
x_test = np.load(pre+"testX.npy")
y_test = np.load(pre+"testY.npy")
In [97]:
loaded_model.compile('adam', 'categorical_crossentropy', metrics=['accuracy'])
In [137]:
loaded_model.evaluate(x_test, y_test)
Out[137]:
In [118]:
loaded_model.predict_classes(x_test[:10])
Out[118]:
In [179]:
predict_prob = loaded_model.predict(x_test[:10])
clusters = np.argsort(-predict_prob)[:, :top_n]
loaded_model.evaluate(x_test[:10], y_test[:10])
Out[179]:
In [185]:
np.arange(10).T
Out[185]:
In [182]:
clusters
Out[182]:
In [135]:
def top_3_accuracy(y_true, y_pred):
return top_k_categorical_accuracy(y_true, y_pred, k=3)
def top_5_accuracy(y_true, y_pred):
return top_k_categorical_accuracy(y_true, y_pred, k=5)
def top_10_accuracy(y_true, y_pred):
return top_k_categorical_accuracy(y_true, y_pred, k=10)
def top_20_accuracy(y_true, y_pred):
return top_k_categorical_accuracy(y_true, y_pred, k=20)
In [136]:
loaded_model.compile('adam', 'categorical_crossentropy', metrics=['accuracy', top_3_accuracy, top_5_accuracy,
top_10_accuracy, top_20_accuracy])
In [223]:
pdb_list = "1FC2C, 1ENH, 2GB1, 2CRO, 1CTF, 4ICB".split(", ")
pre = "/Users/weilu/Research/server/jan_2019/iterative_optimization_another_set/fragment_memory_top5_noeven/"
for p in pdb_list:
name = p.lower()[:4]
getFrags(name, pre, top_n=5, evenWeight=False)
In [ ]:
top_k_categorical_accuracy()
In [59]:
seqFile = "/Users/weilu/Research/optimization/fragment/1ctf.seq"
with open(seqFile) as f:
lines = f.readlines()
In [63]:
a = lines[0].strip()
In [69]:
a
Out[69]:
In [79]:
all_frag = []
for i in range(0, len(a)-8):
frag = a[i:i+9]
# print(frag)
fragIndex = [one_to_index(x) for x in frag]
# print(fragIndex)
all_frag.append(fragIndex)
all_frag = np.array(all_frag)
# print(all_frag)
loaded_model.predict_classes(all_frag)
Out[79]:
In [85]:
predicted_cluster = loaded_model.predict_classes(all_frag)
In [88]:
pre = "/Users/weilu/Research/optimization/fragment/"
header = '''\
[Target]
query
[Memories]
'''
with open(pre+"frags.mem", "w") as out:
out.write(header)
for index, i in enumerate(predicted_cluster):
out.write(f"fraglib/{i}.gro {index+1} 1 9 20\n")
In [ ]:
In [81]:
len(loaded_model.predict_classes(all_frag))
Out[81]:
In [21]:
y_train[2]
Out[21]:
In [17]:
np.argwhere(y_train[:,1] == 1)
Out[17]:
In [76]:
all_frag.shape
Out[76]:
In [77]:
x_train[0:2]
Out[77]:
In [38]:
loaded_model.predict(x_train[10001:10004, :]).round(2)
Out[38]:
In [39]:
loaded_model.predict_classes(x_train[10001:20000, :])
Out[39]:
In [43]:
plt.hist(loaded_model.predict_classes(x_train[:10000, :]), bins=50)
Out[43]:
In [48]:
df = pd.DataFrame({'data':loaded_model.predict_classes(x_train[:10000, :])})
print(df['data'].value_counts().head())
In [47]:
df = pd.DataFrame({'data':loaded_model.predict_classes(x_train[10001:20000, :])})
print(df['data'].value_counts().head())
In [ ]:
In [ ]:
In [55]:
np.all(loaded_model.predict(x_train[10001:20000, :]).round(2) == loaded_model.predict_proba(x_train[10001:20000, :]).round(2))
Out[55]:
In [ ]:
In [57]:
loaded_model.predict(x_train[10002].reshape(1,9)).round(2)
Out[57]:
In [25]:
loaded_model.predict(x_train[10001].reshape(1,-1))
Out[25]:
In [13]:
loaded_model.predict(x_train[0].reshape(1,-1))
Out[13]:
In [6]:
loaded_model.summary()
In [ ]: