In [3]:
from Bio.PDB import *
import os
import sys
import random
import time
from random import seed, randint
import argparse
import platform
from datetime import datetime
import imp
import numpy as np
import fileinput
from itertools import product
import pandas as pd
from scipy.interpolate import griddata
from scipy.interpolate import interp2d
import seaborn as sns
from os import listdir
import scipy
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.interpolate import griddata
import matplotlib as mpl
# from .. import notebookFunctions

%matplotlib inline
plt.rcParams['figure.figsize'] = (10,6.180)    #golden ratio
# %matplotlib notebook
%load_ext autoreload
%autoreload 2


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload

average of 20 proteins


In [4]:
data = np.zeros((181,181))
for i in range(20):
    parser = PDBParser()
    target = f"low_e_jun01_h56/structure_{i}.pdb"
    structure = parser.get_structure('target', target)
    # Assume only one chain
    model = structure[0]
    # chain = model['A']
    chain = model.child_list[0]
    # with open("contact.csv", "w") as out:
    #     out.write("i, j, Distance\n")
    for i_residue in chain:
#         print(i)
        is_regular_res = i_residue.has_id('CA')
        # print(i_residue, is_regular_res)
        if i_residue.get_resname() == "GLY":
            res_i = i_residue['CA']
        else:
            res_i = i_residue['CB']
        for j_residue in chain:
            if j_residue.get_resname() == "GLY":
                res_j = j_residue['CA']
            else:
                res_j = j_residue['CB']
            distance = (res_i.get_coord()[0] - res_j.get_coord()[0])**2
            distance += (res_i.get_coord()[1] - res_j.get_coord()[1])**2
            distance += (res_i.get_coord()[2] - res_j.get_coord()[2])**2
            distance = distance**0.5
    #         print(res_i.get_coord()[0], res_j.get_coord()[0], res_i.get_coord()[0] - res_j.get_coord()[0])
            i = i_residue.id[1]
            j = j_residue.id[1]
            data[i-1,j-1] += distance
            out_line = str(i) + ", " + str(j) + ", " + str(distance) + "\n"
    #         out.write(out_line)
    #         print(out_line)
data /= 20

In [5]:
plt.imshow(data < 7)


Out[5]:
<matplotlib.image.AxesImage at 0x112917588>

In [49]:
# one protein
data = np.zeros((181,181))
parser = PDBParser()
target = "/Users/weilu/Research/data/03_week/low_e_jun01_h56/structure_0.pdb"
structure = parser.get_structure('target', target)
# Assume only one chain
model = structure[0]
# chain = model['A']
chain = model.child_list[0]
# with open("contact.csv", "w") as out:
#     out.write("i, j, Distance\n")
for i_residue in chain:
    is_regular_res = i_residue.has_id('CA')
    # print(i_residue, is_regular_res)
    if i_residue.get_resname() == "GLY":
        res_i = i_residue['CA']
    else:
        res_i = i_residue['CB']
    for j_residue in chain:
        if j_residue.get_resname() == "GLY":
            res_j = j_residue['CA']
        else:
            res_j = j_residue['CB']
        distance = (res_i.get_coord()[0] - res_j.get_coord()[0])**2
        distance += (res_i.get_coord()[1] - res_j.get_coord()[1])**2
        distance += (res_i.get_coord()[2] - res_j.get_coord()[2])**2
        distance = distance**0.5
#         print(res_i.get_coord()[0], res_j.get_coord()[0], res_i.get_coord()[0] - res_j.get_coord()[0])
        i = i_residue.id[1]
        j = j_residue.id[1]
        data[i-1,j-1] = distance
        out_line = str(i) + ", " + str(j) + ", " + str(distance) + "\n"
#         out.write(out_line)
#         print(out_line)

In [50]:
plt.imshow(data < 7)


Out[50]:
<matplotlib.image.AxesImage at 0x11eff9da0>

In [ ]: