In [1]:
import os
import sys
import random
import time
from random import seed, randint
import argparse
import platform
from datetime import datetime
import imp
import numpy as np
import fileinput
from itertools import product
import pandas as pd
from scipy.interpolate import griddata
from scipy.interpolate import interp2d
import seaborn as sns
from os import listdir
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.interpolate import griddata
import matplotlib as mpl
# sys.path.insert(0,'..')
# from notebookFunctions import *
# from .. import notebookFunctions
from Bio.PDB.PDBParser import PDBParser
from pyCodeLib import *
%matplotlib inline
# plt.rcParams['figure.figsize'] = (10,6.180) #golden ratio
# %matplotlib notebook
%load_ext autoreload
%autoreload 2
In [2]:
plt.rcParams['figure.figsize'] = [16.18033, 10] #golden ratio
plt.rcParams['figure.facecolor'] = 'w'
plt.rcParams['figure.dpi'] = 100
In [3]:
a = "/Users/weilu/Research/server/may_2019/multi_iter0/database/dompdb/1EG4A_89-207.pdb"
In [3]:
import glob
In [4]:
a_list = glob.glob("/Users/weilu/Research/server/may_2019/multi_iter0/database/dompdb/*.pdb")
In [16]:
len(a_list)
Out[16]:
In [71]:
parser = PDBParser()
all_freq = np.zeros(100)
for i, a in enumerate(a_list):
structure = parser.get_structure("x", a)
res_list = get_res_list(structure)
neighbor_list = get_neighbor_list(structure)
# sequence = get_sequence_from_structure(structure)
cb_density = calculate_cb_density(res_list, neighbor_list)
cb_density = np.clip(cb_density, 0, 10)
freq, bins = np.histogram(cb_density, bins=100, range=(0,10))
all_freq += freq
# if i == 100:
# break
In [72]:
cb_all_freq = all_freq
cb_bins = bins
In [80]:
plt.plot(cb_bins[1:], cb_all_freq)
plt.xlim(0,5)
Out[80]:
In [73]:
parser = PDBParser()
largest = 20
all_freq = np.zeros(largest*10)
for i, a in enumerate(a_list):
structure = parser.get_structure("x", a)
res_list = get_res_list(structure)
neighbor_list = get_neighbor_list(structure)
# sequence = get_sequence_from_structure(structure)
cb_density = calculate_cb_weight_density(res_list, neighbor_list)
cb_density = np.clip(cb_density, 0, largest)
freq, bins = np.histogram(cb_density, bins=largest*10, range=(0,largest))
all_freq += freq
# if i == 10:
# break
In [74]:
weight_all_freq = all_freq
weight_bins = bins
In [81]:
plt.plot(weight_bins[2:], weight_all_freq[1:])
Out[81]:
In [83]:
plt.plot(weight_bins[2:], weight_all_freq[1:])
plt.xlim(2,4)
Out[83]:
In [10]:
parser = PDBParser()
largest = 600
all_freq = np.zeros(200)
for i, a in enumerate(a_list):
structure = parser.get_structure("x", a)
res_list = get_res_list(structure)
neighbor_list = get_neighbor_list(structure)
# sequence = get_sequence_from_structure(structure)
cb_density = calculate_cb_weight_density_2(res_list, neighbor_list)
cb_density = np.clip(cb_density, 0, largest)
freq, bins = np.histogram(cb_density, bins=200, range=(0,largest))
all_freq += freq
if i == 100:
break
In [11]:
weight_2_all_freq = all_freq
weight_2_bins = bins
In [12]:
plt.plot(weight_2_bins[2:], weight_2_all_freq[1:])
Out[12]:
In [5]:
def calculate_cb_weight_density_2(res_list, neighbor_list, min_seq_sep=2):
weight_info = pd.read_csv("~/opt/parameters/amino_acid_side_chain_weight", comment="#", sep="\s+")
weight_info["normalized_weight"] = weight_info["weight"] /(weight_info["weight"].min())
weight_info["sideChainWeight"] = weight_info["weight"] - 56
weight_info["normalized_weight_2"] = (weight_info["sideChainWeight"] /(weight_info["sideChainWeight"].min()))
num_residues = len(res_list)
density = np.zeros(num_residues)
for res1globalindex, res1 in enumerate(res_list):
res1index = get_local_index(res1)
res1chain = get_chain(res1)
for res2 in get_neighbors_within_radius(neighbor_list, res1, 9.0):
res2index = get_local_index(res2)
res2chain = get_chain(res2)
res2globalindex = get_global_index(res_list, res2)
if abs(res2index - res1index) >= min_seq_sep or (res1chain != res2chain):
rij = get_interaction_distance(res1, res2)
res2type = three_to_one(res2.get_resname())
try:
# weight = float(weight_info.query(f"oneLetterCode == '{res2type}'")["normalized_weight"])
weight = float(weight_info.query(f"oneLetterCode == '{res2type}'")["normalized_weight_2"])
except:
print(res2)
print(res2type)
print(weight_info.query(f"oneLetterCode == '{res2type}'")["normalized_weight"])
density[res1globalindex] += weight*interaction_well(rij, 4.5, 6.5, 5)
return density
In [100]:
parser = PDBParser()
all_freq = np.zeros(100)
largest = 20
all_ = []
for i, a in enumerate(a_list):
structure = parser.get_structure("x", a)
res_list = get_res_list(structure)
neighbor_list = get_neighbor_list(structure)
# sequence = get_sequence_from_structure(structure)
cb_density = calculate_cb_density(res_list, neighbor_list)
cb_density = np.clip(cb_density, 0, 10)
cb_density_weight = calculate_cb_weight_density(res_list, neighbor_list)
cb_density_weight_2 = calculate_cb_weight_density_2(res_list, neighbor_list)
cb_density_weight_2 = np.clip(cb_density_weight, 0, 500)
tmp =pd.DataFrame([cb_density, cb_density_weight, cb_density_weight_2]).T
all_.append(tmp)
freq, bins = np.histogram(cb_density, bins=100, range=(0,10))
all_freq += freq
# break
if i == 10:
break
data = pd.concat(all_)
In [102]:
data.columns = ["density", "weight_density", "weight_density_2"]
In [103]:
data.plot.scatter("density", "weight_density")
Out[103]:
In [104]:
data.plot.scatter("density", "weight_density_2")
Out[104]:
In [106]:
plt.hist(data["weight_density_2"],bins=100)
Out[106]:
In [ ]:
In [ ]:
In [98]:
data.plot.scatter("density", "weight_density")
Out[98]:
In [22]:
k_bin = 0.1
dz = np.linspace(-50,50)
y = 0.5*(1+np.tanh(k_bin*(dz-15)))
In [23]:
plt.plot(dz, y)
Out[23]:
In [24]:
memb_b = 15
y = 0.5*((np.tanh(k_bin*(dz+memb_b)))+(np.tanh(k_bin*(memb_b-dz))))
plt.plot(dz, y)
Out[24]:
In [25]:
y = 0.5*(1+np.tanh(k_bin*(-memb_b-dz)))
plt.plot(dz, y)
Out[25]:
In [88]:
tmp =pd.DataFrame([cb_density, cb_density_weight]).T
In [89]:
pd.concat([tmp,tmp])
Out[89]:
In [ ]:
values = numpy.arange(10, dtype=int)
bins = numpy.arange(-1, 11)
freq, bins = numpy.histogram(values, bins)
In [38]:
freq, bins = np.histogram([-1,1,11], bins=100, range=(0,10))
In [33]:
freq.shape
Out[33]:
In [41]:
all_freq
Out[41]:
In [37]:
bins
Out[37]:
In [45]:
plt.plot(bins[1:], all_freq)
Out[45]:
In [46]:
plt.plot(bins[1:], all_freq)
plt.xlim(2,3)
Out[46]:
In [36]:
all_freq
Out[36]:
In [35]:
plt.plot(bins[1:], all_freq)
Out[35]:
In [29]:
plt.plot(bins[1:], all_freq)
Out[29]:
In [19]:
len(cb_density)
Out[19]:
In [12]:
_ = plt.hist(cb_density, bins=100)
In [ ]: