In [45]:
from Bio.PDB import PDBList
from pdbfixer import PDBFixer
from simtk.openmm.app import PDBFile

In [16]:


In [35]:
fromFile = '/Users/weilu/Research/server/may_week2_2020/hemoglobin/source/5x2r.pdb'
fixer = PDBFixer(filename=fromFile)

chains = list(fixer.topology.chains())
Chosen_chain = "ABCD"

chains_to_remove = [i for i, x in enumerate(chains) if x.id not in Chosen_chain]

fixer.removeChains(chains_to_remove)

fixer.findMissingResidues()
# add missing residues in the middle of a chain, not ones at the start or end of the chain.
chains = list(fixer.topology.chains())
keys = fixer.missingResidues.keys()
if True:
    print("chains to remove", chains_to_remove)
    print("missing residues: ", keys)
if not True:
    for key in list(keys):
        del fixer.missingResidues[key]
else:
    if True:
        for key in list(keys):
            chain_tmp = chains[key[0]]
            if key[1] == 0 or key[1] == len(list(chain_tmp.residues())):
                del fixer.missingResidues[key]

fixer.findNonstandardResidues()
fixer.replaceNonstandardResidues()
# fixer.removeHeterogens(keepWater=False)
fixer.findMissingAtoms()
try:
    fixer.addMissingAtoms()
except:
    print("Unable to add missing atoms")

fixer.addMissingHydrogens(7.0)
PDBFile.writeFile(fixer.topology, fixer.positions, open('/Users/weilu/Research/server/may_week2_2020/hemoglobin/setups/four_chains/test.pdb', 'w'), keepIds=False)


chains to remove [4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19, 20, 21, 22, 23]
missing residues:  dict_keys([(0, 0), (1, 0), (2, 0), (3, 0)])

In [33]:
a = list(fixer.topology.chains())

In [27]:
b = a[-1]

In [29]:
list(b.residues())


Out[29]:
[<Residue 573 (HNI) of chain 7>]

In [43]:
removeHeterogens = False if keep is True else True

In [44]:
removeHeterogens


Out[44]:
True

In [41]:
keep = True

In [42]:
keep = False

In [54]:
import sys
sys.path.insert(0,'/Users/weilu/opt/')
from small_script.myFunctions import *
import matplotlib.pyplot as plt

In [56]:
%matplotlib inline

In [58]:
plt.rcParams['figure.figsize'] = np.array([16.18033, 10])    #golden ratio
plt.rcParams['figure.facecolor'] = 'w'
plt.rcParams['figure.dpi'] = 100
plt.rcParams.update({'font.size': 22})

In [51]:
fastaFile = "/Users/weilu/Research/server/may_week2_2020/disulfide_bond/setups/1ppb_H/1ppb_H.fasta"
seq = getSeqFromFasta(fastaFile)
n =len(seq)

In [55]:
fileName = f"/Users/weilu/Research/server/may_week2_2020/disulfide_bond/setups/1ppb_H/1ppb_H.pdb"
crystal = getContactMapFromPDB(fileName, n)
n = crystal.shape[0]
dmp, _ = get_contactFromDMP(f"/Users/weilu/Research/server/may_week2_2020/disulfide_bond/DMP/1ppb_H/1ppb_H.deepmetapsicov.con", n)


Out[55]:
<matplotlib.image.AxesImage at 0x119bd43c8>

In [61]:
combined = (crystal>0.5).astype(int) + 2*(dmp>0.5).astype(int)
from matplotlib import colors
cmap = colors.ListedColormap(['white', 'red', 'blue', 'lime'])
bounds=[-1,0.01, 1.1, 2.1, 3.1]
norm = colors.BoundaryNorm(bounds, cmap.N)
plt.imshow(combined, origin="bottom", cmap=cmap, norm=norm)


Out[61]:
<matplotlib.image.AxesImage at 0x1198f55f8>

In [60]:
combined = (crystal>0.5).astype(int) + 2*(dmp>0.3).astype(int)
from matplotlib import colors
cmap = colors.ListedColormap(['white', 'red', 'blue', 'lime'])
bounds=[-1,0.01, 1.1, 2.1, 3.1]
norm = colors.BoundaryNorm(bounds, cmap.N)
plt.imshow(combined, origin="bottom", cmap=cmap, norm=norm)


Out[60]:
<matplotlib.image.AxesImage at 0x119b95588>

In [ ]:
plt.savefig(f"figures/contact_{i}_cutoff_{DMP_cutoff}.png", dpi=300)

In [ ]:


In [63]:
data = pd.read_csv("/Users/weilu/Dropbox/openAWSEM/data/complete_run_without_er_gamma_yesCysCys_12-02.csv", index_col=0)
a = pd.read_csv("/Users/weilu/Dropbox/openAWSEM/data/length_info_12-01.csv", index_col=0)
data = data.merge(a, on="Protein")


/Users/weilu/anaconda/lib/python3.6/site-packages/numpy/lib/arraysetops.py:569: FutureWarning: elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison
  mask |= (ar1 == a)

In [ ]:


In [ ]:
plt.rcParams['figure.figsize'] = 1*np.array([16.18033, 10])    #golden ratio
memory = 'ho'
filtered_data = data.query(f"memory == '{memory}'").query("k_cys != 'k_10' and k_cys != 'k_20' and k_cys != 'k_1'")
# filtered_data = data.query(f"Protein == '{protein}'").query(f"memory == '{memory}'").query("k_cys != 'k_10' and k_cys != 'k_20'")
Q_max = filtered_data.sort_values('Q').groupby(["k_cys", "Protein"]).tail(1).sort_values(['k_cys', "Q"])
Q_max.Protein = pd.Categorical(Q_max.Protein, a.sort_values("Length").Protein.values)
Q_max.k_cys = pd.Categorical(Q_max.k_cys,  ["original", "k_0", "k_1", "k_2", "k_5"], ordered=True)
# Q_max["Annealing Index"] = Q_max.groupby(["Submode"])["Q"].rank(method="first", ascending=False).astype(int)
# last = data.query("Steps > 350 and Folder in ['eighth_2_cys']")
Q_max["Scheme"] = Q_max["k_cys"]
# sns.lineplot("Annealing Index", "Q", markers=True, style="k_cys", data=Q_max, hue="k_cys", dashes=False)
ax = sns.lineplot("Protein", "Q", hue="Scheme", markers=True, data=Q_max.sort_values("k_cys").reset_index(drop=True), style="Scheme", dashes=False)
plt.ylabel("Best Q")
# _ = plt.xticks(np.arange(1, 21, 1))
# plt.title(f"structure prediction results of 5 Disufide rich proteins")
# plt.savefig(f"{figureFolder}/fig3_k_cys_complete.png", dpi=300)