In [1]:
import os
import sys
import random
import time
from random import seed, randint
import argparse
import platform
from datetime import datetime
import imp
import numpy as np
import fileinput
from itertools import product
import pandas as pd
from scipy.interpolate import griddata
from scipy.interpolate import interp2d
import seaborn as sns
from os import listdir
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.interpolate import griddata
import matplotlib as mpl
sys.path.insert(0,'/Users/weilu/Research/opt_server/')
# from notebookFunctions import *
# from .. import notebookFunctions
from Bio.PDB.Polypeptide import one_to_three
from Bio.PDB.Polypeptide import three_to_one
from Bio.PDB.PDBParser import PDBParser
from pyCodeLib import *
from small_script.myFunctions import *
from collections import defaultdict
%matplotlib inline
# plt.rcParams['figure.figsize'] = (10,6.180) #golden ratio
# %matplotlib notebook
%load_ext autoreload
%autoreload 2
In [2]:
plt.rcParams['figure.figsize'] = 0.5 * np.array([16.18033, 10]) #golden ratio
plt.rcParams['figure.facecolor'] = 'w'
plt.rcParams['figure.dpi'] = 100
plt.rcParams.update({'font.size': 22})
In [3]:
figureFolder = "/Users/weilu/Dropbox/openAWSEM/figures/"
In [4]:
parser = PDBParser()
In [5]:
def get_cys_info(fileLocation):
s = parser.get_structure("X", fileLocation)
cys_bonds = []
for model, m in enumerate(s.get_models()):
all_res = list(m.get_residues())
n = len(all_res)
for i in range(n):
res1 = all_res[i]
if res1.resname != "CYS":
continue
for j in range(i+1, n):
res2 = all_res[j]
if res2.resname != "CYS":
continue
r = res1["CB"] - res2["CB"]
if r < 4.5:
cys_bonds.append([model, i, j, r])
# print(i, j, r)
a = pd.DataFrame(cys_bonds, columns=["model", "i", "j", "r"])
# a.to_csv("cys_bonds.csv")
return a
def correct_fraction(native_cys_bond, prediction_cys_bond):
# FN, TP
TP = FN = FP = 0
native_cys_bond["pair"] = native_cys_bond["i"].astype(str) + "_" + native_cys_bond["j"].astype(str)
native_pair_list = native_cys_bond["pair"].to_list()
prediction_cys_bond["pair"] = prediction_cys_bond["i"].astype(str) + "_" + prediction_cys_bond["j"].astype(str)
prediction_pair_list = prediction_cys_bond["pair"].to_list()
for native_pair in native_pair_list:
if native_pair in prediction_pair_list:
TP += 1
else:
FN += 1
# FP,
for prediction_pair in prediction_pair_list:
if prediction_pair not in native_pair_list:
FP += 1
return TP, FN, FP
def correct_fraction_v2(native_cys_bond, prediction_cys_bond):
TP, FN, FP = correct_fraction(native_cys_bond, prediction_cys_bond)
f = TP/(TP+FN+FP)
return f, TP, FN, FP
def correct_fraction_v3(prediction_cys_bond, native_cys_bond=None):
TP, FN, FP = correct_fraction(native_cys_bond, prediction_cys_bond)
f = TP/(TP+FN+FP)
return pd.DataFrame([[f, TP, FN, FP]], columns=["fraction", "TP", "FN", "FP"])
In [10]:
# data = pd.read_csv("/Users/weilu/Research/data/openMM/six_proteins_run2_single_timeStep2_lesser_frag_06-17.csv", index_col=0)
# # data = pd.read_csv("/Users/weilu/Research/data/openMM/six_proteins_run2_single_timeStep2_lesser_frag_06-16.csv", index_col=0)
# a = pd.read_csv("/Users/weilu/Dropbox/openAWSEM/data/length_info_jun10.csv", index_col=0)
# a = a.sort_values("Length").reset_index(drop=True)
# data = data.merge(a, on="Protein")
# # a = a.append({'Protein':'1ppb_H', 'Length':259}, ignore_index=True)
# # a = a.sort_values("Length").reset_index(drop=True)
# # a.to_csv("/Users/weilu/Dropbox/openAWSEM/data/length_info_jun10.csv")
# pdb_list = ["1ppb_H", "1lmm", "1tcg", "1hn4_A", "1fs3", "1bpi"]
# d_list =[]
# for pdb in pdb_list:
# t = pd.read_csv(f"/Users/weilu/Research/data/openMM/{pdb}_cys_06_17.csv")
# d_list.append(t)
# d_cys = pd.concat(d_list).reset_index(drop=True)
# d_cys["Steps"] = d_cys["model"] + 2
# data.Submode = data.Submode.astype(int)
# data = data.merge(d_cys, how='left', on=["Steps", "Protein", "Submode", "Run"])
# data["fraction"] = data["fraction"].fillna(0)
# data.to_csv("/Users/weilu/Research/data/openMM/disulfide_jun17.csv")
data = pd.read_csv("/Users/weilu/Research/data/openMM/disulfide_jun17.csv", index_col=0)
In [11]:
data["Protein"].unique()
Out[11]:
In [12]:
# data = data.query("Run < 10").reset_index(drop=True)
In [13]:
# scheme_dic = {"330":"k_0", "331":"k_1", "332":"k_2", "333":"k_5", "3300":"original"}
scheme_dic = {"330":"k=0", "331":"k=1", "332":"k=2", "333":"k=5", "3300":"Standard AWSEM"}
data["Submode"] = data["Submode"].astype(str)
data["Scheme"] = data["Submode"].apply(lambda x: scheme_dic[x])
selected = data.query("Steps > 2200 and Protein != '1hn4'").sort_values("Q").groupby(["Protein", "Submode", "Run"]).tail(1)
# a = a.query("Protein != '1hn4'").reset_index(drop=True)
# selected.Protein = pd.Categorical(selected.Protein, categories=a.Protein, ordered=True)
In [53]:
# selected.to_csv("/Users/weilu/Research/server/jun_week3_2020/disulfide_bond/selected_jun17.csv")
In [52]:
selected.query("Protein =='1lmm' and Submode=='332'")
Out[52]:
In [213]:
a = selected.groupby(["Protein", "Submode"])["fraction"].max().reset_index()
a["Scheme"] = a["Submode"].apply(lambda x: scheme_dic[x])
In [214]:
# a = a.sort_values(["Scheme"]).reset_index()
a.Scheme = pd.Categorical(a.Scheme, categories=['k=5', 'k=2', 'k=1', 'k=0', 'Standard AWSEM'], ordered=True)
a = a.sort_values(["Scheme"]).reset_index()
a.Protein = pd.Categorical(a.Protein, categories=['1tcg', '1lmm', '1bpi', '1fs3', '1ppb_H', '1hn4_A'], ordered=True)
ax = sns.lineplot("Protein", "fraction", hue="Scheme", style="Scheme", markers=True, data=a, dashes=True)
leg = ax.legend(loc = 'center left', bbox_to_anchor = (1.0, 0.5))
labels = [item.get_text() for item in ax.get_xticklabels()]
labels[0] = '1tcg\n(3)'
labels[1] = '1lmm\n(3)'
labels[2] = '1bpi\n(3)'
labels[3] = '1fs3\n(4)'
labels[4] = '1ppb\n(4)'
labels[5] = '1hn4\n(7)'
ax.set_xticklabels(labels)
plt.xlabel("Protein(number of disulfide bonds)")
plt.ylabel("Fraction")
plt.savefig("/Users/weilu/Dropbox/openAWSEM/figures/k_cys_fraction_v2.png", dpi=300, bbox_inches='tight')
In [ ]:
In [56]:
data["Submode"] = data["Submode"].astype(str)
data["Scheme"] = data["Submode"].apply(lambda x: scheme_dic[x])
selected = data.query("Steps > 2200 and Protein != '1hn4'").sort_values("Q").groupby(["Protein", "Submode"]).tail(1)
selected.Scheme = pd.Categorical(selected.Scheme, categories=['k=5', 'k=2', 'k=1', 'k=0', 'Standard AWSEM'], ordered=True)
selected = selected.sort_values(["Scheme"]).reset_index()
selected.Protein = pd.Categorical(selected.Protein, categories=['1tcg', '1lmm', '1bpi', '1fs3', '1hn4_A', '1ppb_H'], ordered=True)
In [15]:
pd.set_option('display.max_columns', 1000)
In [62]:
selected.query("Protein=='1fs3'")
Out[62]:
In [ ]:
panda.to_
In [222]:
ax = sns.lineplot("Protein", "Q", hue="Scheme", style="Scheme", markers=True, data=selected.sort_values(["Scheme", "Protein"]).reset_index(drop=True), dashes=False)
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.ylabel("Best Q")
plt.xlabel("Protein(Length)")
labels = [item.get_text() for item in ax.get_xticklabels()]
labels[0] = '1tcg\n(22)'
labels[1] = '1lmm\n(40)'
labels[2] = '1bpi\n(58)'
labels[3] = '1fs3\n(124)'
labels[4] = '1hn4\n(129)'
labels[5] = '1ppb\n(259)'
ax.set_xticklabels(labels)
plt.savefig("/Users/weilu/Dropbox/openAWSEM/figures/k_cys_bestQ_v2.png", dpi=300, bbox_inches='tight')
In [21]:
filtered_data = data.query("Steps > 2200")
# filtered_data = filtered_data.query("k_cys != 'original'")
Q_max = filtered_data.sort_values('Q').groupby(["Protein", "Scheme", "Run"]).tail(1).sort_values(["Protein", 'Scheme', "Q"]).reset_index(drop=True)
Q_max["Annealing Index"] = Q_max.groupby(["Protein", "Scheme"])["Q"].rank(method="first", ascending=False).astype(int)
# Q_max.k_cys = pd.Categorical(Q_max.k_cys, ['k=5', 'k=2', 'k=1', 'k=0', 'Standard AWSEM'], ordered=True)
# Q_max["Scheme"] = Q_max["k_cys"]
Q_max.Scheme = pd.Categorical(Q_max.Scheme, categories=['k=5', 'k=2', 'k=1', 'k=0', 'Standard AWSEM'], ordered=True)
Q_max = Q_max.sort_values(["Scheme"]).reset_index()
fig, axes = plt.subplots(nrows=3, ncols=2, figsize=(1.5*16.18033, 2*10))
protein = '1tcg'
Q_max_one = Q_max.query(f"Protein == '{protein}'")
ax = sns.lineplot("Annealing Index", "Q", hue="Scheme", data=Q_max_one, legend=False,
markers=True, style="Scheme", dashes=False, ax=axes[0][0])
_ = ax.set_xticks(np.arange(1, 21, 1))
ax.set_ylabel("Best Q")
ax.set_title(f"{protein}")
protein = '1bpi'
Q_max_one = Q_max.query(f"Protein == '{protein}'")
ax = sns.lineplot("Annealing Index", "Q", hue="Scheme", data=Q_max_one, legend=False,
markers=True, style="Scheme", dashes=False, ax=axes[0][1])
_ = ax.set_xticks(np.arange(1, 21, 1))
ax.set_ylabel("Best Q")
ax.set_title(f"{protein}")
protein = '1lmm'
Q_max_one = Q_max.query(f"Protein == '{protein}'")
ax = sns.lineplot("Annealing Index", "Q", hue="Scheme", data=Q_max_one, legend=False,
markers=True, style="Scheme", dashes=False, ax=axes[1][0])
_ = ax.set_xticks(np.arange(1, 21, 1))
ax.set_ylabel("Best Q")
ax.set_title(f"{protein}")
protein = '1fs3'
Q_max_one = Q_max.query(f"Protein == '{protein}'")
ax = sns.lineplot("Annealing Index", "Q", hue="Scheme", data=Q_max_one, legend=False,
markers=True, style="Scheme", dashes=False, ax=axes[1][1])
_ = ax.set_xticks(np.arange(1, 21, 1))
ax.set_ylabel("Best Q")
ax.set_title(f"{protein}")
protein = '1hn4_A'
Q_max_one = Q_max.query(f"Protein == '{protein}'")
ax = sns.lineplot("Annealing Index", "Q", hue="Scheme", data=Q_max_one, legend=False,
markers=True, style="Scheme", dashes=False, ax=axes[2][0])
_ = ax.set_xticks(np.arange(1, 21, 1))
ax.set_ylabel("Best Q")
ax.set_title(f"1hn4")
protein = '1ppb_H'
Q_max_one = Q_max.query(f"Protein == '{protein}'")
ax = sns.lineplot("Annealing Index", "Q", hue="Scheme", data=Q_max_one, legend=False,
markers=True, style="Scheme", dashes=False, ax=axes[2][1])
_ = ax.set_xticks(np.arange(1, 21, 1))
ax.set_ylabel("Best Q")
ax.set_title(f"{protein}")
# fig.legend((['k_0', 'k_1', 'k_2', 'k_5', 'original']), loc='center right', bbox_to_anchor=(0.7, 0.15))
fig.legend((['k=5', 'k=2', 'k=1', 'k=0', 'Standard AWSEM']), bbox_to_anchor=(0.98, 0.5), loc=2, borderpad=1.5)
# fig.legend((['k_0', 'k_1', 'k_2', 'k_5', 'original']), loc="lower left", ncol=5)
# fig.legend((['k_0', 'k_1', 'k_2', 'k_5', 'original']), loc='lower center')
# axes[-1, -1].axis('off')
plt.tight_layout()
plt.savefig(f"{figureFolder}/seperate_run_best_include_1ppb_v2.png", dpi=300, bbox_inches='tight')
In [6]:
data_cys = pd.read_csv("/Users/weilu/Research/data/openMM/cys_all_run2_single_timeStep2_lesser_frag_06-17.csv", index_col=0)
In [7]:
native_pair = ['65_72', '40_95', '58_110', '26_84']
# group they by pair
t = data_cys.query("Protein=='1fs3' and Submode=='3300'").reset_index(drop=True)
t["pair"] = (1 + t["i"]).astype(str) + "_" + (1 + t["j"]).astype(str)
# tt = t.query("Run==5").reset_index(drop=True)
tt = t
pair_list = tt["pair"].unique().tolist()
pair_list = sorted(pair_list)
n_pair = len(pair_list)
pair_dict = {}
for i, pair in enumerate(pair_list):
pair_dict[pair] = i
pair_timeline = np.zeros((n_pair, 2500))
for i, line in tt.iterrows():
frame = line["model"]
pair = line["pair"]
if pair in native_pair:
pair_timeline[pair_dict[pair]][frame] += 1/20
else:
pair_timeline[pair_dict[pair]][frame] -= 1/20
In [8]:
pair_timeline_3300 = pair_timeline
In [37]:
f = plt.figure(figsize = (20,10))
ax = f.add_subplot(121)
ax.yaxis.tick_right()
plt.imshow(pair_timeline_3300, aspect='auto', cmap="bwr", vmin=-1, vmax=1)
_ = plt.yticks(ticks=range(n_pair), labels="")
# ax.set_yticklabels([])
# yaxis.tick_right()
plt.xlabel("Frames(Standard AWSEM)")
plt.ylabel("Formation of disulfide bonds")
ax = f.add_subplot(122)
ax.yaxis.tick_right()
plt.imshow(pair_timeline_333, aspect='auto', cmap="bwr", vmin=-1, vmax=1)
_ = plt.yticks(ticks=range(n_pair), labels=pair_list)
plt.xlabel("Frames(with new disulfide bond potential, k=5)")
f.subplots_adjust(wspace=0.1, hspace=0)
# plt.tight_layout
plt.savefig(f"{figureFolder}/1fs3_combined_average_trajectory.png", dpi=300, bbox_inches='tight')
In [9]:
f = plt.figure(figsize = (20,10))
ax = f.add_subplot(111)
ax.yaxis.tick_right()
plt.imshow(pair_timeline, aspect='auto', cmap="bwr", vmin=-1, vmax=1)
_ = plt.yticks(ticks=range(n_pair), labels=pair_list)
# yaxis.tick_right()
plt.xlabel("Frames")
plt.ylabel("Formation of disulfide bonds")
plt.savefig(f"{figureFolder}/1fs3_mode3300_average_trajectory.png", dpi=300, bbox_inches='tight')
In [18]:
native_pair = ['65_72', '40_95', '58_110', '26_84']
# group they by pair
t = data_cys.query("Protein=='1fs3' and Submode=='3300'").reset_index(drop=True)
t["pair"] = (1 + t["i"]).astype(str) + "_" + (1 + t["j"]).astype(str)
tt = t.query("Run==7").reset_index(drop=True)
# tt = t
pair_list = tt["pair"].unique().tolist()
pair_list = sorted(pair_list)
n_pair = len(pair_list)
pair_dict = {}
for i, pair in enumerate(pair_list):
pair_dict[pair] = i
pair_timeline = np.zeros((n_pair, 2500))
for i, line in tt.iterrows():
frame = line["model"]
pair = line["pair"]
if pair in native_pair:
pair_timeline[pair_dict[pair]][frame] += 1
else:
pair_timeline[pair_dict[pair]][frame] -= 1
In [19]:
f = plt.figure(figsize = (20,10))
ax = f.add_subplot(111)
ax.yaxis.tick_right()
plt.imshow(pair_timeline, aspect='auto', cmap="bwr", vmin=-1, vmax=1)
_ = plt.yticks(ticks=range(n_pair), labels=pair_list)
# yaxis.tick_right()
plt.xlabel("Frames")
plt.ylabel("Formation of disulfide bonds")
plt.savefig(f"{figureFolder}/1fs3_mode3300_run_7_trajectory.png", dpi=300, bbox_inches='tight')
In [16]:
native_pair = ['65_72', '40_95', '58_110', '26_84']
# group they by pair
t = data_cys.query("Protein=='1fs3' and Submode=='333'").reset_index(drop=True)
t["pair"] = (1 + t["i"]).astype(str) + "_" + (1 + t["j"]).astype(str)
# tt = t.query("Run==5").reset_index(drop=True)
tt = t
pair_list = tt["pair"].unique().tolist()
pair_list = sorted(pair_list)
n_pair = len(pair_list)
pair_dict = {}
for i, pair in enumerate(pair_list):
pair_dict[pair] = i
pair_timeline = np.zeros((n_pair, 2500))
for i, line in tt.iterrows():
frame = line["model"]
pair = line["pair"]
if pair in native_pair:
pair_timeline[pair_dict[pair]][frame] += 1/20
else:
pair_timeline[pair_dict[pair]][frame] -= 1/20
In [17]:
pair_timeline_333 = pair_timeline
In [21]:
f = plt.figure(figsize = (20,10))
ax = f.add_subplot(111)
ax.yaxis.tick_right()
plt.imshow(pair_timeline, aspect='auto', cmap="bwr", vmin=-1, vmax=1)
_ = plt.yticks(ticks=range(n_pair), labels=pair_list)
# yaxis.tick_right()
plt.xlabel("Frames")
plt.ylabel("Formation of disulfide bonds")
plt.savefig(f"{figureFolder}/1fs3_mode333_average_trajectory.png", dpi=300, bbox_inches='tight')
In [24]:
native_pair = ['65_72', '40_95', '58_110', '26_84']
# group they by pair
t = data_cys.query("Protein=='1fs3' and Submode=='333'").reset_index(drop=True)
t["pair"] = (1 + t["i"]).astype(str) + "_" + (1 + t["j"]).astype(str)
tt = t.query("Run==6").reset_index(drop=True)
# tt = t
pair_list = tt["pair"].unique().tolist()
pair_list = sorted(pair_list)
n_pair = len(pair_list)
pair_dict = {}
for i, pair in enumerate(pair_list):
pair_dict[pair] = i
pair_timeline = np.zeros((n_pair, 2500))
for i, line in tt.iterrows():
frame = line["model"]
pair = line["pair"]
if pair in native_pair:
pair_timeline[pair_dict[pair]][frame] += 1
else:
pair_timeline[pair_dict[pair]][frame] -= 1
In [25]:
f = plt.figure(figsize = (20,10))
ax = f.add_subplot(111)
ax.yaxis.tick_right()
plt.imshow(pair_timeline, aspect='auto', cmap="bwr", vmin=-1, vmax=1)
_ = plt.yticks(ticks=range(n_pair), labels=pair_list)
# yaxis.tick_right()
plt.xlabel("Frames")
plt.ylabel("Formation of disulfide bonds")
plt.savefig(f"{figureFolder}/1fs3_mode333_run_6_trajectory.png", dpi=300, bbox_inches='tight')
In [42]:
native_pair = ['65_72', '40_95', '58_110', '26_84']
# group they by pair
t = data_cys.query("Protein=='1fs3' and Submode=='333'").reset_index(drop=True)
t["pair"] = (1 + t["i"]).astype(str) + "_" + (1 + t["j"]).astype(str)
# tt = t.query("Run==5").reset_index(drop=True)
tt = t
pair_list = tt["pair"].unique().tolist()
pair_list = sorted(pair_list)
n_pair = len(pair_list)
pair_dict = {}
for i, pair in enumerate(pair_list):
pair_dict[pair] = i
pair_timeline = np.zeros((n_pair, 2500))
for i, line in tt.iterrows():
frame = line["model"]
pair = line["pair"]
if pair in native_pair:
pair_timeline[pair_dict[pair]][frame] += 1/20
else:
pair_timeline[pair_dict[pair]][frame] += 1/20
In [54]:
f = plt.figure(figsize = (20,10))
ax = f.add_subplot(111)
ax.yaxis.tick_right()
plt.imshow(pair_timeline, aspect='auto', cmap="Greys")
_ = plt.yticks(ticks=range(n_pair), labels=pair_list)
# yaxis.tick_right()
plt.xlabel("Frames")
plt.ylabel("Formation of disulfide bonds")
for ytick, pair in zip(ax.get_yticklabels(), pair_list):
if pair in native_pair:
ytick.set_color("red")
else:
ytick.set_color("black")
# plt.colorbar()
# plt.savefig(f"{figureFolder}/1fs3_run3_mode_333_trajectory.png", dpi=300, bbox_inches='tight')
In [65]:
native_pair = ['65_72', '40_95', '58_110', '26_84']
# group they by pair
t = data_cys.query("Protein=='1fs3' and Submode=='333'").reset_index(drop=True)
t["pair"] = (1 + t["i"]).astype(str) + "_" + (1 + t["j"]).astype(str)
tt = t.query("Run==6").reset_index(drop=True)
# tt = t
pair_list = tt["pair"].unique().tolist()
pair_list = sorted(pair_list)
n_pair = len(pair_list)
pair_dict = {}
for i, pair in enumerate(pair_list):
pair_dict[pair] = i
pair_timeline = np.zeros((n_pair, 2500))
for i, line in tt.iterrows():
frame = line["model"]
pair = line["pair"]
if pair in native_pair:
pair_timeline[pair_dict[pair]][frame] = 1
else:
pair_timeline[pair_dict[pair]][frame] = 2
In [66]:
f = plt.figure(figsize = (20,10))
ax = f.add_subplot(111)
ax.yaxis.tick_right()
plt.imshow(pair_timeline, aspect='auto', cmap="hot_r")
_ = plt.yticks(ticks=range(n_pair), labels=pair_list)
# yaxis.tick_right()
plt.xlabel("Frames")
plt.ylabel("Formation of disulfide bonds")
# plt.savefig(f"{figureFolder}/1fs3_run3_mode_33_trajectory.png", dpi=300, bbox_inches='tight')
Out[66]:
In [ ]:
In [ ]:
In [50]:
a = pd.DataFrame(range(1, 2501), columns=["Steps"])
t = d_cys.query("Run==3 and Protein=='1fs3' and Submode=='333'").reset_index(drop=True)
t["Steps"] = t["model"] + 1
b = a.merge(t,how="left").fillna(0)
plt.plot(b["fraction"])
plt.xlabel("Frame")
plt.ylabel("Fraction")
plt.savefig("/Users/weilu/Dropbox/openAWSEM/figures/fraction_trajectory_run3_1fs3_333.png", dpi=300, bbox_inches='tight')
In [ ]:
In [63]:
f = plt.figure(figsize = (20,10))
ax = f.add_subplot(111)
ax.yaxis.tick_right()
plt.imshow(pair_timeline, aspect='auto', cmap="hot_r")
_ = plt.yticks(ticks=range(n_pair), labels=pair_list)
# yaxis.tick_right()
plt.xlabel("Frames")
plt.ylabel("Formation of disulfide bonds")
# plt.colorbar()
# plt.savefig(f"{figureFolder}/1fs3_run3_mode_33_trajectory.png", dpi=300, bbox_inches='tight')
Out[63]:
In [51]:
f = plt.figure(figsize = (20,10))
ax = f.add_subplot(111)
ax.yaxis.tick_right()
plt.imshow(pair_timeline, aspect='auto', cmap="hot_r")
_ = plt.yticks(ticks=range(n_pair), labels=pair_list)
# yaxis.tick_right()
plt.xlabel("Frames")
plt.ylabel("Formation of disulfide bonds")
# plt.colorbar()
# plt.savefig(f"{figureFolder}/1fs3_run3_mode_33_trajectory.png", dpi=300, bbox_inches='tight')
Out[51]:
In [ ]:
In [477]:
f = plt.figure(figsize = (20,10))
ax = f.add_subplot(111)
ax.yaxis.tick_right()
plt.imshow(pair_timeline, aspect='auto', cmap="hot_r")
_ = plt.yticks(ticks=range(n_pair), labels=pair_list)
# yaxis.tick_right()
plt.xlabel("Frames")
plt.ylabel("Formation of disulfide bonds")
plt.savefig(f"{figureFolder}/1fs3_run3_mode_33_trajectory.png", dpi=300, bbox_inches='tight')
In [455]:
f = plt.figure(figsize = (20,10))
ax = f.add_subplot(111)
ax.yaxis.tick_right()
plt.imshow(pair_timeline, aspect='auto')
_ = plt.yticks(ticks=range(n_pair), labels=pair_list)
# yaxis.tick_right()
plt.savefig(f"{figureFolder}/1fs3_run3_mode_33_trajectory.png", dpi=300, bbox_inches='tight')
In [350]:
t.query("Run==3").groupby("pair").count().sort_values("r")
Out[350]:
In [368]:
t.query("model > 2400 and Run==3").groupby("pair").count().sort_values("r")
Out[368]:
In [353]:
t.query("model > 2000 and Run==3").groupby("pair").count().sort_values("r")
Out[353]:
In [389]:
a = data_cys.query("model > 2000").groupby(["Run", "Protein", "Submode", "model", "j"])["r"].count().reset_index().sort_values("r")
b = a.query("r==3").groupby(["Protein", "Submode"])["r"].count().reset_index()
b
Out[389]:
In [398]:
a = data_cys.query("model > 2000 and r < 4").groupby(["Run", "Protein", "Submode", "model", "j"])["r"].count().reset_index().sort_values("r")
b = a.query("r==2").groupby(["Protein", "Submode"])["r"].count().reset_index()
b
Out[398]:
In [405]:
a = data_cys.query("model > 2000 and i==25 and j==83")
In [407]:
a.groupby("Submode").count()
Out[407]:
In [281]:
t = data_cys.query("Protein=='1fs3' and Submode=='33' and Run==3")
In [195]:
tt = t.query("model==2414").reset_index(drop=True)
In [256]:
protein = "1fs3"
fileLocation = f"/Users/weilu/Research/server/may_week2_2020/disulfide_bond/setups/{protein}/{protein}.pdb"
native_cys_bond = get_cys_info(fileLocation)
correct_fraction_v2(native_cys_bond, tt)
Out[256]:
In [224]:
pd.DataFrame([[f, TP, FN, FP]], columns=["f", "TP", "FN", "FP"])
Out[224]:
In [226]:
a = t.groupby(["model", "Run", "Protein", "Submode"]).apply(correct_fraction_v3, native_cys_bond)
In [282]:
tt = t.query("model==3").reset_index(drop=True)
In [283]:
correct_fraction_v2(native_cys_bond, tt)
Out[283]:
In [284]:
correct_fraction(native_cys_bond, tt)
Out[284]:
In [308]:
native_cys_bond
Out[308]:
In [309]:
correct_fraction_v3(tt, native_cys_bond)
Out[309]:
In [304]:
In [317]:
data_selected = data_cys.query(f"Protein=='{protein}' and Submode=='33' and Run==3").head(4)
data_selected.groupby(["model", "Run", "Protein", "Submode"]).apply(correct_fraction_v3, native_cys_bond=native_cys_bond).reset_index()
Out[317]:
In [311]:
# from functools import partial
# correct_fraction_v3_one_protein = partial(correct_fraction_v3, native_cys_bond=native_cys_bond)
In [318]:
pdb_list = ["1ppb", "1lmm", "1tcg", "1hn4_A", "1fs3", "1bpi"]
# pdb_list = ["1ppb", "1lmm", "1tcg", "1hn4_A", "1bpi"]
for pdb in pdb_list:
print(pdb)
protein = pdb
fileLocation = f"/Users/weilu/Research/server/may_week2_2020/disulfide_bond/setups/{protein}/{protein}.pdb"
native_cys_bond = get_cys_info(fileLocation)
data_selected = data_cys.query(f"Protein=='{protein}'")
result = data_selected.groupby(["model", "Run", "Protein", "Submode"]).apply(correct_fraction_v3, native_cys_bond=native_cys_bond).reset_index()
result.to_csv(f"/Users/weilu/Research/data/openMM/{protein}_cys_05-22.csv")
In [319]:
pdb_list = ["1ppb", "1lmm", "1tcg", "1hn4_A", "1fs3", "1bpi"]
d_list =[]
for pdb in pdb_list:
t = pd.read_csv(f"/Users/weilu/Research/data/openMM/{pdb}_cys_05-22.csv")
d_list.append(t)
d_cys = pd.concat(d_list).reset_index(drop=True)
In [320]:
In [321]:
d_cys.query("Protein=='1fs3' and Submode=='33' and Run==3")
Out[321]:
In [322]:
d_cys.query("Protein=='1fs3' and Submode=='33' and Run==3")
Out[322]:
In [174]:
In [249]:
scheme_dic = {"30":"k_0", "31":"k_1", "32":"k_2", "33":"k_5", "300":"original"}
data["Submode"] = data["Submode"].astype(str)
data["Scheme"] = data["Submode"].apply(lambda x: scheme_dic[x])
selected = data.query("Steps > 1500 and Protein != '1ppb_H' and Protein != '1hn4'").sort_values("Q").groupby(["Protein", "Submode", "Run"]).tail(1)
In [252]:
selected.query("Protein=='1fs3' and Submode=='300'")
Out[252]:
In [ ]:
In [329]:
d_cys.query("Protein=='1fs3' and Submode=='33'").groupby("Run")[["TP", "FN", "FP", "fraction"]].sum()
Out[329]:
In [330]:
d_cys.query("model > 2000 and Protein=='1fs3' and Submode=='300'").sort_values("FP")
Out[330]:
In [332]:
d_cys.query("Protein=='1fs3' and Submode=='300'").sort_values("FP")
Out[332]:
In [400]:
a = pd.DataFrame(range(1, 2501), columns=["Steps"])
t = d_cys.query("Run==3 and Protein=='1fs3' and Submode=='33'").reset_index(drop=True)
t["Steps"] = t["model"] + 1
b = a.merge(t,how="left").fillna(0)
plt.plot(b["fraction"])
plt.xlabel("Frame")
plt.ylabel("Fraction")
plt.savefig("/Users/weilu/Dropbox/openAWSEM/figures/fraction_trajectory_run3_1fs3_33.png", dpi=300, bbox_inches='tight')
In [237]:
t = d_cys_2.query("Run==3 and Protein=='1fs3' and Submode=='33'").reset_index(drop=True)
plt.plot(t["fraction"])
Out[237]:
In [242]:
t = d_cys_2.query("Run==12 and Protein=='1fs3' and Submode=='33'").reset_index(drop=True)
plt.plot(t["fraction"])
Out[242]:
In [ ]:
# read cys bond information.
selected = data.query("Steps > 1500").sort_values("Q").groupby(["Protein", "Submode"]).tail(1)
# selected = pd.read_csv("/Users/weilu/Research/server/may_week2_2020/disulfide_bond/selected.csv")
fraction = []
for i, line in selected.iterrows():
run = line["Run"]
protein = line["Protein"]
frame = line["Steps"] - 2
submode = line["Submode"]
print(run, protein, frame, submode)
fileLocation = f"/Users/weilu/Research/server/may_week2_2020/disulfide_bond/chosen_frames/{protein}_{submode}.pdb"
prediction_cys_bond = get_cys_info(fileLocation)
fileLocation = f"/Users/weilu/Research/server/may_week2_2020/disulfide_bond/setups/{protein}/{protein}.pdb"
native_cys_bond = get_cys_info(fileLocation)
TP, FN, FP = correct_fraction(native_cys_bond, prediction_cys_bond)
fraction.append([TP, FN, FP])
selected = selected.reset_index(drop=True)
fraction_ = pd.DataFrame(fraction, columns=["TP", "FN", "FP"])
selected = pd.concat([selected, fraction_], axis=1)
selected["fraction"] = selected["TP"] / (selected["TP"] + selected["FN"] + selected["FP"])
In [ ]:
In [164]:
data.query("model > 2000 and Protein=='1fs3'").sort_values("FP")
Out[164]:
In [ ]:
In [142]:
data.query("model > 2000").sort_values("FP").query("FP == 7")
Out[142]:
In [150]:
In [ ]:
In [ ]:
In [ ]:
scheme_dic = {"30":"k_0", "31":"k_1", "32":"k_2", "33":"k_5", "300":"original"}
data["Submode"] = data["Submode"].astype(str)
data["Scheme"] = data["Submode"].apply(lambda x: scheme_dic[x])
data["k_cys"] = data["Scheme"]
In [142]:
# selected = pd.read_csv("/Users/weilu/Research/server/may_week2_2020/disulfide_bond/selected.csv")
fraction = []
for i, line in selected.iterrows():
run = line["Run"]
protein = line["Protein"]
frame = line["Steps"] - 2
submode = line["Submode"]
print(run, protein, frame, submode)
fileLocation = f"/Users/weilu/Research/server/may_week2_2020/disulfide_bond/chosen_frames/{protein}_{submode}.pdb"
prediction_cys_bond = get_cys_info(fileLocation)
fileLocation = f"/Users/weilu/Research/server/may_week2_2020/disulfide_bond/setups/{protein}/{protein}.pdb"
native_cys_bond = get_cys_info(fileLocation)
TP, FN, FP = correct_fraction(native_cys_bond, prediction_cys_bond)
fraction.append([TP, FN, FP])
In [157]:
# pdb_list = ["1ppb", "1lmm", "1tcg", "1hn4_A", "1fs3", "1bpi"]
# # pdb_list = ["1ppb", "1lmm", "1tcg", "1hn4_A", "1bpi"]
# for pdb in pdb_list:
# print(pdb)
# protein = pdb
# fileLocation = f"/Users/weilu/Research/server/may_week2_2020/disulfide_bond/setups/{protein}/{protein}.pdb"
# native_cys_bond = get_cys_info(fileLocation)
# data_selected = data.query(f"Protein=='{protein}'")
# result = data_selected[["model", "Run", "Protein", "Submode"]].groupby(["model", "Run", "Protein", "Submode"]).head(1).reset_index(drop=True)
# result['fraction'], result['TP'], result['FN'], result['FP'] = zip(*data_selected.groupby(["model", "Run", "Protein", "Submode"]).apply(correct_fraction_v2, native_cys_bond).reset_index(drop=True))
# result.to_csv(f"/Users/weilu/Research/data/openMM/{protein}_cys_05-21.csv")
# pdb_list = ["1ppb", "1lmm", "1tcg", "1hn4_A", "1fs3", "1bpi"]
# d_list =[]
# for pdb in pdb_list:
# t = pd.read_csv(f"/Users/weilu/Research/data/openMM/{pdb}_cys_05-21.csv")
# d_list.append(t)
# data = pd.concat(d_list).reset_index(drop=True)
# d_cys = data
# protein = "1fs3"
# fileLocation = f"/Users/weilu/Research/server/may_week2_2020/disulfide_bond/setups/{protein}/{protein}.pdb"
# native_cys_bond = get_cys_info(fileLocation)
# data_selected = data.query(f"Protein=='{protein}'")
# result = data_selected[["model", "Run", "Protein", "Submode"]].groupby(["model", "Run", "Protein", "Submode"]).head(1).reset_index(drop=True)
# result['fraction'], result['TP'], result['FN'], result['FP'] = zip(*data_selected.groupby(["model", "Run", "Protein", "Submode"]).apply(correct_fraction_v2, native_cys_bond).reset_index(drop=True))
# result.to_csv(f"/Users/weilu/Research/data/openMM/{protein}_cys_05-21.csv")
In [159]:
# selected.to_csv("/Users/weilu/Research/data/openMM/six_proteins_run1_ho_05-20.csv")
In [151]:
t = data.query("Run==9 and Protein=='1hn4_A' and Submode=='300'").reset_index(drop=True)
plt.plot(t["fraction"])
Out[151]:
In [153]:
t.head(20)
Out[153]:
In [ ]:
In [114]:
data.head(10)["Protein"].unique()[0]
Out[114]:
In [ ]:
result
In [116]:
result = t[["model", "Run", "Protein", "Submode"]].groupby("model").head(1).reset_index(drop=True)
result['fraction'], result['TP'], result['FN'], result['FP'] = zip(*t.groupby("model").apply(correct_fraction_v2, native_cys_bond).reset_index(drop=True))
In [117]:
result
Out[117]:
In [52]:
f, TP, FN, FP = correct_fraction_v2(native_cys_bond, tt)
In [100]:
t.groupby("model").apply(correct_fraction_v2, native_cys_bond).reset_index(drop=True)
Out[100]:
In [ ]:
result.apply()
In [76]:
result = t.groupby("model").apply(correct_fraction_v2, native_cys_bond).reset_index(drop=True)
In [98]:
result
Out[98]:
In [101]:
plt.plot(result["TP"])
Out[101]:
In [99]:
plt.plot(result["fraction"])
Out[99]:
In [ ]:
In [64]:
result = result.to_frame(name="result")
In [106]:
data
Out[106]:
In [49]:
tt
Out[49]:
In [320]:
selected = pd.read_csv("/Users/weilu/Research/data/openMM/six_proteins_run1_ho_05-20.csv", index_col=0)
In [174]:
selected = selected.reset_index(drop=True)
fraction_ = pd.DataFrame(fraction, columns=["TP", "FN", "FP"])
selected = pd.concat([selected, fraction_], axis=1)
selected["fraction"] = selected["TP"] / (selected["TP"] + selected["FN"] + selected["FP"])
# selected["Submode"] = selected["Submode"].astype(str)
# # selected["Scheme"] = "mode" + selected["Submode"]
# selected["Scheme"] = selected["Submode"].apply(lambda x: scheme_dic[x])
ax = sns.lineplot("Protein", "fraction", hue="Scheme", style="Scheme", markers=True, data=selected, dashes=True)
# plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
leg = ax.legend(loc = 'center left', bbox_to_anchor = (1.0, 0.5))
# plt.tight_layout()
plt.savefig("/Users/weilu/Dropbox/openAWSEM/figures/k_cys_fraction.png", dpi=300, bbox_inches='tight')
In [ ]:
In [ ]:
In [43]:
s = parser.get_structure("X", "/Users/weilu/Research/server/may_week2_2020/disulfide_bond/setups/1lmm/1lmm.pdb")
m = s[0]
all_res = list(m.get_residues())
In [52]:
n = len(all_res)
native_cys_bond = []
for i in range(n):
res1 = all_res[i]
if res1.resname != "CYS":
continue
for j in range(i+1, n):
res2 = all_res[j]
if res2.resname != "CYS":
continue
r = res1["CB"] - res2["CB"]
# print(i, j, r)
if r < 4.5:
# print(i, j, r)
native_cys_bond.append([i, j, r])
native_cys_bond = pd.DataFrame(native_cys_bond, columns=["i", "j", "r"])
In [53]:
native_cys_bond
Out[53]:
In [55]:
s = parser.get_structure("X", "/Users/weilu/Research/server/may_week2_2020/disulfide_bond/run1_ho/1lmm/33_0/lastFrame.pdb")
m = s[0]
all_res = list(m.get_residues())
In [56]:
n = len(all_res)
for i in range(n):
res1 = all_res[i]
if res1.resname != "CYS":
continue
for j in range(i+1, n):
res2 = all_res[j]
if res2.resname != "CYS":
continue
r = res1["CB"] - res2["CB"]
if r < 4.5:
print(i, j, r)
In [98]:
native_cys_bond
Out[98]:
In [80]:
# pdb_list = ["1tt8", "6btc", "6g57", "6q64", "pex22", "sos", "unk2", "6jdq", "5uak"]
pdb_list = ["1hn4_A", "1ppb_H", "1lmm", "1tcg", "1fs3", "1bpi"]
subMode_list = []
# subMode_list += [100, 101, 102, 103, 104, 105, 106]
# subMode_list += [110, 111, 112, 113, 114, 115, 116]
# subMode_list += [120, 121, 122, 123, 124, 125, 126]
# subMode_list += [0, 1, 2, 3, 4, 5]
# subMode_list += [30, 31, 32, 33, 34, 35]
# subMode_list += [1300, 130, 131, 132, 133]
subMode_list += [3300, 330, 331, 332, 333]
# subMode_list += [130, 131, 132, 133, 134, 135]
simulationType = "six_proteins"
subFolder = "run2_single_timeStep2_lesser_frag"
pre_base = "/Users/weilu/Research/server/jun_week1_2020//disulfide_bond/"
info = []
all_data = []
for pdb in pdb_list:
for submode in subMode_list:
for i in range(20):
pre = f"{pre_base}/{subFolder}/{pdb}/{submode}_{i}"
location = f"{pre}/info.dat"
try:
tmp = pd.read_csv(location, sep="\s+")
tmp = tmp.assign(Run=i, Protein=pdb, Submode=submode)
all_data.append(tmp)
except Exception as e:
print(pdb, i, submode, location)
print(e)
pass
data = pd.concat(all_data, sort=False)
# info = pd.DataFrame(info)
today = datetime.today().strftime('%m-%d')
outFile = f"/Users/weilu/Research/data/openMM/{simulationType}_{subFolder}_{today}.csv"
data.reset_index(drop=True).to_csv(outFile)
print(outFile)
In [28]:
# pdb_list = ["1tt8", "6btc", "6g57", "6q64", "pex22", "sos", "unk2", "6jdq", "5uak"]
pdb_list = ["1hn4_A", "1ppb_H", "1lmm", "1tcg", "1fs3", "1bpi"]
subMode_list = []
# subMode_list += [100, 101, 102, 103, 104, 105, 106]
# subMode_list += [110, 111, 112, 113, 114, 115, 116]
# subMode_list += [120, 121, 122, 123, 124, 125, 126]
# subMode_list += [0, 1, 2, 3, 4, 5]
# subMode_list += [30, 31, 32, 33, 34, 35]
# subMode_list += [1300, 130, 131, 132, 133]
subMode_list += [2300, 230, 231, 232, 233]
# subMode_list += [130, 131, 132, 133, 134, 135]
simulationType = "six_proteins"
subFolder = "run2_single_timeStep2_less_frag"
pre_base = "/Users/weilu/Research/server/jun_week1_2020//disulfide_bond/"
info = []
all_data = []
for pdb in pdb_list:
for submode in subMode_list:
for i in range(20):
pre = f"{pre_base}/{subFolder}/{pdb}/{submode}_{i}"
location = f"{pre}/info.dat"
try:
tmp = pd.read_csv(location, sep="\s+")
tmp = tmp.assign(Run=i, Protein=pdb, Submode=submode)
all_data.append(tmp)
except Exception as e:
print(pdb, i, submode, location)
print(e)
pass
data = pd.concat(all_data, sort=False)
# info = pd.DataFrame(info)
today = datetime.today().strftime('%m-%d')
outFile = f"/Users/weilu/Research/data/openMM/{simulationType}_{subFolder}_{today}.csv"
data.reset_index(drop=True).to_csv(outFile)
print(outFile)
In [3]:
# pdb_list = ["1tt8", "6btc", "6g57", "6q64", "pex22", "sos", "unk2", "6jdq", "5uak"]
pdb_list = ["1hn4_A", "1ppb", "1lmm", "1tcg", "1fs3", "1bpi"]
subMode_list = []
# subMode_list += [100, 101, 102, 103, 104, 105, 106]
# subMode_list += [110, 111, 112, 113, 114, 115, 116]
# subMode_list += [120, 121, 122, 123, 124, 125, 126]
# subMode_list += [0, 1, 2, 3, 4, 5]
# subMode_list += [30, 31, 32, 33, 34, 35]
subMode_list += [1300, 130, 131, 132, 133]
# subMode_list += [130, 131, 132, 133, 134, 135]
simulationType = "six_proteins"
subFolder = "run2_single_timeStep2"
pre_base = "/Users/weilu/Research/server/jun_week1_2020//disulfide_bond/"
info = []
all_data = []
for pdb in pdb_list:
for submode in subMode_list:
for i in range(20):
pre = f"{pre_base}/{subFolder}/{pdb}/{submode}_{i}"
location = f"{pre}/info.dat"
try:
tmp = pd.read_csv(location, sep="\s+")
tmp = tmp.assign(Run=i, Protein=pdb, Submode=submode)
all_data.append(tmp)
except Exception as e:
print(pdb, i, submode, location)
print(e)
pass
data = pd.concat(all_data, sort=False)
# info = pd.DataFrame(info)
today = datetime.today().strftime('%m-%d')
outFile = f"/Users/weilu/Research/data/openMM/{simulationType}_{subFolder}_{today}.csv"
data.reset_index(drop=True).to_csv(outFile)
print(outFile)
In [ ]:
pdb_list = ["1ppb", "1lmm", "1tcg", "1hn4_A", "1fs3", "1bpi"]
# pdb_list = ["1ppb", "1lmm", "1tcg", "1hn4_A", "1bpi"]
for pdb in pdb_list:
print(pdb)
protein = pdb
fileLocation = f"/Users/weilu/Research/server/jun_week1_2020//disulfide_bond/setups/{protein}/{protein}.pdb"
native_cys_bond = get_cys_info(fileLocation)
data_selected = data_cys.query(f"Protein=='{protein}'")
result = data_selected.groupby(["model", "Run", "Protein", "Submode"]).apply(correct_fraction_v3, native_cys_bond=native_cys_bond).reset_index()
result.to_csv(f"/Users/weilu/Research/data/openMM/{protein}_cys_06-06.csv")
In [19]:
# pdb_list = ["1tt8", "6btc", "6g57", "6q64", "pex22", "sos", "unk2", "6jdq", "5uak"]
pdb_list = ["1hn4_A", "1ppb_H", "1ppb", "1lmm", "1tcg", "1hn4", "1fs3", "1bpi"]
subMode_list = []
# subMode_list += [100, 101, 102, 103, 104, 105, 106]
# subMode_list += [110, 111, 112, 113, 114, 115, 116]
# subMode_list += [120, 121, 122, 123, 124, 125, 126]
# subMode_list += [0, 1, 2, 3, 4, 5]
# subMode_list += [30, 31, 32, 33, 34, 35]
subMode_list += [300, 30, 31, 32, 33]
# subMode_list += [130, 131, 132, 133, 134, 135]
simulationType = "six_proteins"
subFolder = "run1_ho"
pre_base = "/Users/weilu/Research/server/may_week2_2020/disulfide_bond/"
info = []
all_data = []
for pdb in pdb_list:
for submode in subMode_list:
for i in range(20):
pre = f"{pre_base}/{subFolder}/{pdb}/{submode}_{i}"
location = f"{pre}/info.dat"
try:
tmp = pd.read_csv(location, sep="\s+")
tmp = tmp.assign(Run=i, Protein=pdb, Submode=submode)
all_data.append(tmp)
except Exception as e:
print(pdb, i, submode, location)
print(e)
pass
data = pd.concat(all_data, sort=False)
# info = pd.DataFrame(info)
today = datetime.today().strftime('%m-%d')
outFile = f"/Users/weilu/Research/data/openMM/{simulationType}_{subFolder}_{today}.csv"
data.reset_index(drop=True).to_csv(outFile)
print(outFile)
In [ ]:
a = pd.read_csv("/Users/weilu/Research/data/openMM/length_info_12-01.csv", index_col=0)
data = data.merge(a, on="Protein")
In [17]:
filtered_data = data.query("Steps > 1500 and Protein == '1lmm'")
Q_max = filtered_data.sort_values('Q').groupby(["Submode", "Run"]).tail(1).sort_values(['Submode', "Q"])
Q_max["Annealing Index"] = Q_max.groupby(["Submode"])["Q"].rank(method="first", ascending=False).astype(int)
sns.lineplot("Annealing Index", "Q", hue="Submode", markers=True, data=Q_max, style="Submode", dashes=False)
Out[17]:
In [ ]:
In [88]:
selected = data.query("Steps > 1500").sort_values("Q").groupby(["Protein", "Submode"]).tail(1)
selected["Submode"] = selected["Submode"].astype(str)
selected["Scheme"] = "mode" + selected["Submode"]
ax = sns.lineplot("Protein", "Q", hue="Scheme", markers=True, data=selected, dashes=False)
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
Out[88]:
In [74]:
selected = data.query("Steps > 1500")
sns.boxenplot("Protein", "Q", hue="Submode", data=selected)
Out[74]:
In [ ]:
selected.to_csv("/Users/weilu/Research/server/may_week2_2020/disulfide_bond/selected.csv")
In [ ]:
memory = 'ho'
filtered_data = data.query(f"memory == '{memory}'").query("k_cys != 'k_10' and k_cys != 'k_20'")
# filtered_data = data.query(f"Protein == '{protein}'").query(f"memory == '{memory}'").query("k_cys != 'k_10' and k_cys != 'k_20'")
Q_max = filtered_data.sort_values('Q').groupby(["Submode", "Protein"]).tail(1).sort_values(['Submode', "Q"])
Q_max.Protein = pd.Categorical(Q_max.Protein, a.sort_values("Length").Protein.values)
Q_max.k_cys = pd.Categorical(Q_max.k_cys, ["k_0", "k_1", "k_2", "k_5"])
# Q_max["Annealing Index"] = Q_max.groupby(["Submode"])["Q"].rank(method="first", ascending=False).astype(int)
# last = data.query("Steps > 350 and Folder in ['eighth_2_cys']")
# sns.lineplot("Annealing Index", "Q", markers=True, style="k_cys", data=Q_max, hue="k_cys", dashes=False)
sns.lineplot("Protein", "Q", hue="k_cys", markers=True, data=Q_max.sort_values("Length").reset_index(drop=True), style="k_cys", dashes=False)
plt.ylabel("Best Q")
# _ = plt.xticks(np.arange(1, 21, 1))
plt.title(f"structure prediction results of 5 Disufide rich proteins")
# plt.savefig("/Users/weilu/Dropbox/openAWSEM/figures/k_cys_original_gamma.png", dpi=300)
In [28]:
a = np.load("/Users/weilu/Research/server/may_week3_2020/T1024/T1024.npz")
In [33]:
a["dist"].shape
Out[33]:
In [31]:
b = np.load("/Users/weilu/Research/server/may_week3_2020/T1024/dist.npz")
In [34]:
b["distspline"].shape
Out[34]:
In [ ]:
def get_frame(file="movie.pdb", to="last_frame.pdb", frame=-1):
# default is last frame.
# if you want first, please set frame to 1.
a = open(file).read().split("ENDMDL")
assert a[-1] == "\nEND\n"
with open(to, "w") as out:
out.write(a[frame-1])
In [ ]:
In [ ]:
In [ ]:
In [31]:
# read all cys information.
# pdb_list = ["1tt8", "6btc", "6g57", "6q64", "pex22", "sos", "unk2", "6jdq", "5uak"]
pdb_list = ["1hn4_A", "1ppb_H", "1lmm", "1tcg", "1fs3", "1bpi"]
subMode_list = []
# subMode_list += [100, 101, 102, 103, 104, 105, 106]
# subMode_list += [110, 111, 112, 113, 114, 115, 116]
# subMode_list += [120, 121, 122, 123, 124, 125, 126]
# subMode_list += [0, 1, 2, 3, 4, 5]
# subMode_list += [30, 31, 32, 33, 34, 35]
# subMode_list += [300, 30, 31, 32, 33]
subMode_list += [3300, 330, 331, 332, 333]
# subMode_list += [130, 131, 132, 133, 134, 135]
simulationType = "six_proteins"
subFolder = "run2_single_timeStep2_lesser_frag"
pre_base = "/Users/weilu/Research/server/jun_week1_2020/disulfide_bond/"
info = []
all_data = []
for pdb in pdb_list:
for submode in subMode_list:
for i in range(15):
pre = f"{pre_base}/{subFolder}/{pdb}/{submode}_{i}"
location = f"{pre}/cys_bonds.csv"
try:
tmp = pd.read_csv(location, index_col=0)
tmp = tmp.assign(Run=i, Protein=pdb, Submode=submode)
all_data.append(tmp)
except Exception as e:
print(pdb, i, submode, location)
print(e)
pass
data = pd.concat(all_data, sort=False)
# info = pd.DataFrame(info)
today = datetime.today().strftime('%m-%d')
outFile = f"/Users/weilu/Research/data/openMM/{simulationType}_{subFolder}_{today}.csv"
data.reset_index(drop=True).to_csv(outFile)
print(outFile)
In [79]:
# read all cys information.
# pdb_list = ["1tt8", "6btc", "6g57", "6q64", "pex22", "sos", "unk2", "6jdq", "5uak"]
pdb_list = ["1hn4_A", "1ppb_H", "1lmm", "1tcg", "1fs3", "1bpi"]
subMode_list = []
# subMode_list += [100, 101, 102, 103, 104, 105, 106]
# subMode_list += [110, 111, 112, 113, 114, 115, 116]
# subMode_list += [120, 121, 122, 123, 124, 125, 126]
# subMode_list += [0, 1, 2, 3, 4, 5]
# subMode_list += [30, 31, 32, 33, 34, 35]
# subMode_list += [300, 30, 31, 32, 33]
subMode_list += [3300, 330, 331, 332, 333]
# subMode_list += [130, 131, 132, 133, 134, 135]
simulationType = "cys_all"
subFolder = "run2_single_timeStep2_lesser_frag"
pre_base = "/Users/weilu/Research/server/jun_week1_2020/disulfide_bond/"
info = []
all_data = []
for pdb in pdb_list:
for submode in subMode_list:
for i in range(20):
pre = f"{pre_base}/{subFolder}/{pdb}/{submode}_{i}"
location = f"{pre}/cys_bonds.csv"
try:
tmp = pd.read_csv(location, index_col=0)
tmp = tmp.assign(Run=i, Protein=pdb, Submode=submode)
all_data.append(tmp)
except Exception as e:
print(pdb, i, submode, location)
print(e)
pass
data = pd.concat(all_data, sort=False)
# info = pd.DataFrame(info)
today = datetime.today().strftime('%m-%d')
outFile = f"/Users/weilu/Research/data/openMM/{simulationType}_{subFolder}_{today}.csv"
data.reset_index(drop=True).to_csv(outFile)
print(outFile)
In [ ]: