In [6]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
# from small_script.myFunctions import *

import Bio.PDB as bio
d3_to_index = bio.Polypeptide.d3_to_index  # we may want to adjust this in the future.

plt.rcParams['figure.figsize'] = [16.18033, 10]

%matplotlib inline
%load_ext autoreload
%autoreload 2


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload

In [4]:
pre = "/Users/weilu/Research/optimization/fragment/"
database = "/Users/weilu/Research/optimization/fragment/database/dompdb/"

In [24]:
a = "/Users/weilu/Research/optimization/fragment/database/dompdb/12asA00.pdb"
b = "/Users/weilu/Research/optimization/fragment/database/dompdb/2a01C01.pdb"

In [ ]:


In [46]:
parser = bio.PDBParser()
structure = parser.get_structure("x", a)

In [31]:
c = structure[0]["A"]

In [49]:
ppb=bio.CaPPBuilder()
for pp in ppb.build_peptides(structure):
    print(pp.get_sequence())


AYIAKQRQISFVKSHFSRQLEERLGLIEVQAPILSRVGDGTQDNLSGAEKAVQVKVKALPDAQFEVVHSLAKWKRQTLGQHDFSAGEGLYTHMKALRPDEDRLSPLHSVYVDQWDWERVMGDGERQFSTLKSTVEAIWAGIKATEAAVSEEFGLAPFLPDQIHFVHSQELLSRYPDLDAKGRERAIAKDLGAVFLVGIGGKLSDGHRHDVRAPDYDDWSTPSELGHAGLNGDILVWNPVLEDAFELSSMGIRVDADTLKHQLALTGDEDRLELEWHQALLRGEMPQTIGGGIGQSRLTMLLLQLPHIGQVQAGVWPAAVRESVPSLL

In [51]:
pwd


Out[51]:
'/Users/weilu/opt/notebook/Optimization'

complete, with ca and cb on one row


In [35]:
min_seq_sep=3
max_seq_sep=9
parser = bio.PDBParser(QUIET=True)
three_to_one = bio.Polypeptide.three_to_one

pdb_list = os.listdir(database)
# pdb_list = ['1w5cC02.pdb']
# pdb_list = ['3zciA00.pdb']
for cc, pdb in enumerate(pdb_list):
    has_something = False
    pdbId = pdb.split('.')[0]
#     print(pdbId)
#     if cc == 2:
#         break
    if cc % 200 == 0:
        print(cc)
    with open(pre+f"processed_5/{pdbId}.csv", "w") as out:
#         out.write("pdb,i,j,res1,res2,dis_ca_ca,dis_ca_cb,dis_cb_ca,dis_cb_cb\n")
        structure = parser.get_structure("x", os.path.join(database, pdb))
        for model in structure:
            for chain in model:
                all_residues = list(chain)
#                 print(all_residues)
                for i, residue in enumerate(all_residues):
                    outLine = ""
                    need = True
                    dis_ca_ca = []
                    dis_ca_cb = []
                    dis_cb_ca = []
                    dis_cb_cb = []
                    resId = residue.get_id()[1]
                    frag = all_residues[i:i+max_seq_sep]
                    resseq_list = [x.get_id()[1] for x in frag]
                    fragSeq = "".join([three_to_one(x.get_resname()) for x in frag])
#                     print(i, fragSeq)
                    if len(frag) != 9:
                        continue
                    if not np.all(np.ediff1d(resseq_list)==1):
#                         print(f"mismatch, {resId}, {resseq_list}")
                        continue
                    for ii in range(7):
                        if not need:
                            break
                        try:
                            r1 = frag[ii]
                        except Exception as ex:
                            need = False
                            break
                        # print(i, residue.get_resname())
                        for j, r2 in enumerate(frag[(min_seq_sep+ii):]):
                            # The H of GLY is replaced with CB in this dataset
                            try:
                                r2_cb = r2["CB"]
                            except Exception as ex:
                                try:
                                    r2_cb = r2["CA"]
                                except Exception as ex:
                                    # print(pdbId, resId)
                                    os.system(f"echo '{pdbId}' >> {pre}/without_discontinues_and_gly_exception_2")
                                    need = False
                                    break
                            try:
                                r1_cb = r1["CB"]
                            except Exception as ex:
                                try:
                                    r1_cb = r1["CA"]
                                except Exception as ex:
                                    # print(pdbId, resId)
                                    os.system(f"echo '{pdbId}' >> {pre}/without_discontinues_and_gly_exception_2")
                                    need = False
                                    break
                            try:
                                r1_ca = r1["CA"]
                                r2_ca = r2["CA"]
                            except Exception as ex:
                                print(pdbId, resId)
                                os.system(f"echo '{pdbId}' >> {pre}/without_discontinues_and_gly_exception_2")
                                need = False
                                break
                            dis_ca_ca.append(str(r1_ca-r2_ca))
                            dis_ca_cb.append(str(r1_ca-r2_cb))
                            dis_cb_ca.append(str(r1_cb-r2_ca))
                            dis_cb_cb.append(str(r1_cb-r2_cb))
                    if need:
                        outLine = f"{pdbId},{i},{fragSeq},"+",".join(dis_ca_ca)+\
                        ","+",".join(dis_ca_cb)+\
                        ","+",".join(dis_cb_ca)+\
                        ","+",".join(dis_cb_cb)+"\n"
                        out.write(outLine)
#         if has_something:
#             print(pdbId)


0
200
400
2yqyA00 143
2yqyA00 144
2yqyA00 145
2yqyA00 146
2yqyA00 147
2yqyA00 148
2yqyA00 149
2yqyA00 150
2yqyA00 151
600
800
1000
1200
1400
1600
1800
2000
2200
2400
1cm3A00 2
1cm3A00 3
1cm3A00 4
1cm3A00 5
1cm3A00 6
1cm3A00 7
1cm3A00 8
1cm3A00 9
1cm3A00 10
2600
2800
3000
3200
3400
3600
3800
4000
4200
4400
4600
4800
5000
5200
5400
5600
5800
6000
6200
6400
6600
6800
7000
7200
7400
7600
7800
8000
8200
8400
8600
8800
9000
9200
9400
9600
9800
10000
10200
10400
2uvgA01 330
2uvgA01 331
2uvgA01 332
2uvgA01 333
2uvgA01 334
2uvgA01 335
2uvgA01 336
2uvgA01 337
2uvgA01 338
10600
10800
11000
11200
11400
11600
11800
12000
12200
12400
12600
12800
13000
13200
13400
13600
13800
14000
14200
14400

In [37]:
headline = "pdb,i,seq," + ",".join([f"caca_{i}" for i in range(1,22)]) + \
        "," + ",".join([f"cacb_{i}" for i in range(1,22)]) + \
        "," + ",".join([f"cbca_{i}" for i in range(1,22)]) + \
        "," + ",".join([f"cbcb_{i}" for i in range(1,22)]) 
os.system(f"echo '{headline}' > /Users/weilu/Research/optimization/fragment/data_jan31.csv")
# combine all to one data.
pdb_list = os.listdir("/Users/weilu/Research/optimization/fragment/processed_5/")
for cc, pdb in enumerate(pdb_list):
    os.system(f"cat /Users/weilu/Research/optimization/fragment/processed_5/{pdb} >> /Users/weilu/Research/optimization/fragment/data_jan31.csv")

In [28]:
min_seq_sep=3
max_seq_sep=9
parser = bio.PDBParser(QUIET=True)
three_to_one = bio.Polypeptide.three_to_one

pdb_list = os.listdir(database)
# pdb_list = ['1w5cC02.pdb']
# pdb_list = ['3zciA00.pdb']
for cc, pdb in enumerate(pdb_list):
    has_something = False
    pdbId = pdb.split('.')[0]
#     print(pdbId)
#     if cc == 2:
#         break
    if cc % 200 == 0:
        print(cc)
    with open(pre+f"processed_4/{pdbId}.csv", "w") as out:
#         out.write("pdb,i,j,res1,res2,dis_ca_ca,dis_ca_cb,dis_cb_ca,dis_cb_cb\n")
        structure = parser.get_structure("x", os.path.join(database, pdb))
        for model in structure:
            for chain in model:
                all_residues = list(chain)
#                 print(all_residues)
                for i, residue in enumerate(all_residues):
                    outLine = ""
                    need = True
                    dis_ca_ca = []
                    dis_ca_cb = []
                    dis_cb_ca = []
                    dis_cb_cb = []
                    resId = residue.get_id()[1]
                    frag = all_residues[i:i+max_seq_sep]
                    resseq_list = [x.get_id()[1] for x in frag]
                    fragSeq = "".join([three_to_one(x.get_resname()) for x in frag])
#                     print(i, fragSeq)
                    if len(frag) != 9:
                        continue
                    if not np.all(np.ediff1d(resseq_list)==1):
#                         print(f"mismatch, {resId}, {resseq_list}")
                        continue
                    for ii in range(7):
                        if not need:
                            break
                        try:
                            r1 = frag[ii]
                        except Exception as ex:
                            need = False
                            break
                        # print(i, residue.get_resname())
                        for j, r2 in enumerate(frag[(min_seq_sep+ii):]):
                            # The H of GLY is replaced with CB in this dataset
                            try:
                                dis_ca_ca.append(str(r1["CA"] - r2["CA"]))
                                dis_ca_cb.append(str(r1["CA"] - r2["CB"]))
                                dis_cb_ca.append(str(r1["CB"] - r2["CA"]))
                                dis_cb_cb.append(str(r1["CB"] - r2["CB"]))
                            except Exception as ex:
    #                             print(pdbId, ex)
                                os.system(f"echo '{pdbId}' >> {pre}/without_discontinues_and_gly_exception_2")
                                need = False
                                break
#                             outLine += f"{pdbId},{i},{j},{residue.get_resname()},{r2.get_resname()},{dis_ca_ca},{dis_ca_cb},{dis_cb_ca},{dis_cb_cb}\n"
                    if need:
                        outLine = f"{pdbId},{i},{fragSeq},"+",".join(dis_ca_ca)+\
                        ","+",".join(dis_ca_cb)+\
                        ","+",".join(dis_cb_ca)+\
                        ","+",".join(dis_cb_cb)+"\n"
                        out.write(outLine)
#         if has_something:
#             print(pdbId)


0
200
400
600
800
1000
1200
1400
1600
1800
2000
2200
2400
2600
2800
3000
3200
3400
3600
3800
4000
4200
4400
4600
4800
5000
5200
5400
5600
5800
6000
6200
6400
6600
6800
7000
7200
7400
7600
7800
8000
8200
8400
8600
8800
9000
9200
9400
9600
9800
10000
10200
10400
10600
10800
11000
11200
11400
11600
11800
12000
12200
12400
12600
12800
13000
13200
13400
13600
13800
14000
14200
14400

In [24]:


In [25]:
headline


Out[25]:
'pdb,i,seq,caca_1,caca_2,caca_3,caca_4,caca_5,caca_6,caca_7,caca_8,caca_9,caca_10,caca_11,caca_12,caca_13,caca_14,caca_15,caca_16,caca_17,caca_18,caca_19,caca_20,caca_21,cacb_1,cacb_2,cacb_3,cacb_4,cacb_5,cacb_6,cacb_7,cacb_8,cacb_9,cacb_10,cacb_11,cacb_12,cacb_13,cacb_14,cacb_15,cacb_16,cacb_17,cacb_18,cacb_19,cacb_20,cacb_21,cbca_1,cbca_2,cbca_3,cbca_4,cbca_5,cbca_6,cbca_7,cbca_8,cbca_9,cbca_10,cbca_11,cbca_12,cbca_13,cbca_14,cbca_15,cbca_16,cbca_17,cbca_18,cbca_19,cbca_20,cbca_21,cbcb_1,cbcb_2,cbcb_3,cbcb_4,cbcb_5,cbcb_6,cbcb_7,cbcb_8,cbcb_9,cbcb_10,cbcb_11,cbcb_12,cbcb_13,cbcb_14,cbcb_15,cbcb_16,cbcb_17,cbcb_18,cbcb_19,cbcb_20,cbcb_21'

In [29]:



Out[29]:
0

In [30]:
headline = "pdb,i,seq," + ",".join([f"caca_{i}" for i in range(1,22)]) + \
        "," + ",".join([f"cacb_{i}" for i in range(1,22)]) + \
        "," + ",".join([f"cbca_{i}" for i in range(1,22)]) + \
        "," + ",".join([f"cbcb_{i}" for i in range(1,22)]) 
os.system(f"echo '{headline}' > /Users/weilu/Research/optimization/fragment/data_jan20.csv")
# combine all to one data.
pdb_list = os.listdir("/Users/weilu/Research/optimization/fragment/processed_4/")
for cc, pdb in enumerate(pdb_list):
    os.system(f"cat /Users/weilu/Research/optimization/fragment/processed_4/{pdb} >> /Users/weilu/Research/optimization/fragment/data_jan20.csv")

another format. with all dis for one frag


In [ ]:
min_seq_sep=3
max_seq_sep=9
parser = bio.PDBParser(QUIET=True)
three_to_one = bio.Polypeptide.three_to_one

pdb_list = os.listdir(database)
# pdb_list = ['1w5cC02.pdb']
# pdb_list = ['3zciA00.pdb']
for cc, pdb in enumerate(pdb_list):
    has_something = False
    pdbId = pdb.split('.')[0]
#     print(pdbId)
#     if cc == 2:
#         break
    if cc % 200 == 0:
        print(cc)
    with open(pre+f"processed_3/{pdbId}.csv", "w") as out:
#         out.write("pdb,i,j,res1,res2,dis_ca_ca,dis_ca_cb,dis_cb_ca,dis_cb_cb\n")
        structure = parser.get_structure("x", os.path.join(database, pdb))
        for model in structure:
            for chain in model:
                all_residues = list(chain)
#                 print(all_residues)
                for i, residue in enumerate(all_residues):
                    outLine = ""
                    need = True
                    dis_ca_ca = []
                    dis_ca_cb = []
                    dis_cb_ca = []
                    dis_cb_cb = []
                    resId = residue.get_id()[1]
                    frag = all_residues[i:i+max_seq_sep]
                    resseq_list = [x.get_id()[1] for x in frag]
                    fragSeq = "".join([three_to_one(x.get_resname()) for x in frag])
#                     print(i, fragSeq)
                    if len(frag) != 9:
                        continue
                    if not np.all(np.ediff1d(resseq_list)==1):
#                         print(f"mismatch, {resId}, {resseq_list}")
                        continue
                    for ii in range(7):
                        if not need:
                            break
                        try:
                            r1 = frag[ii]
                        except Exception as ex:
                            need = False
                            break
                        # print(i, residue.get_resname())
                        for j, r2 in enumerate(frag[(min_seq_sep+ii):]):
                            # The H of GLY is replaced with CB in this dataset
                            try:
                                dis_ca_ca.append(str(r1["CA"] - r2["CA"]))
                                dis_ca_cb.append(str(r1["CA"] - r2["CB"]))
                                dis_cb_ca.append(str(r1["CB"] - r2["CA"]))
                                dis_cb_cb.append(str(r1["CB"] - r2["CB"]))
                            except Exception as ex:
    #                             print(pdbId, ex)
                                os.system(f"echo '{pdbId}' >> {pre}/without_discontinues_and_gly_exception_2")
                                need = False
                                break
                            outLine += f"{pdbId},{i},{j},{residue.get_resname()},{r2.get_resname()},{dis_ca_ca},{dis_ca_cb},{dis_cb_ca},{dis_cb_cb}\n"
                    if need:
                        outLine = f"{pdbId},{i},{fragSeq},"+",".join(dis_ca_ca)+"\n"
                        outLine += f"{pdbId},{i},{fragSeq},"+",".join(dis_ca_cb)+"\n"
                        outLine += f"{pdbId},{i},{fragSeq},"+",".join(dis_cb_ca)+"\n"
                        outLine += f"{pdbId},{i},{fragSeq},"+",".join(dis_cb_cb)+"\n"
                        out.write(outLine)
#         if has_something:
#             print(pdbId)


0
200
400
600
800
1000
1200
1400
1600
1800
2000
2200
2400
2600
2800
3000
3200
3400
3600
3800
4000
4200
4400
4600
4800
5000
5200
5400
5600
5800
6000
6200
6400
6600
6800
7000
7200
7400
7600
7800
8000
8200
8400
8600
8800
9000
9200
9400
9600
9800
10000
10200
10400
10600
10800
11000
11200
11400
11600
11800
12000
12200
12400
12600
12800
13000
13200
13400
13600
13800

In [90]:
min_seq_sep=3
max_seq_sep=9
parser = bio.PDBParser(QUIET=True)

pdb_list = os.listdir(database)
# pdb_list = ['1w5cC02.pdb']
for cc, pdb in enumerate(pdb_list):
    has_something = False
    pdbId = pdb.split('.')[0]
#     print(pdbId)
#     if cc == 2:
#         break
    if cc % 200 == 0:
        print(cc)
    with open(pre+f"processed_2/{pdbId}.csv", "w") as out:
#         out.write("pdb,i,j,res1,res2,dis_ca_ca,dis_ca_cb,dis_cb_ca,dis_cb_cb\n")
        structure = parser.get_structure("x", os.path.join(database, pdb))
        for model in structure:
            for chain in model:
                all_residues = list(chain)
                for i, residue in enumerate(all_residues):
                    outLine = ""
                    need = True
                    # print(i, residue.get_resname())
                    hetflag, resseq, icode = residue.get_id()
                    old_resseq = resseq + min_seq_sep - 1
                    for j, r2 in enumerate(all_residues[i+min_seq_sep:i+max_seq_sep+1]):
                        hetflag, resseq, icode = r2.get_id()
                        if resseq != old_resseq + 1:
#                             print(f"mismatch, {resseq}, {old_resseq}, {i}, {j}")
                            need = False
                            has_something = True
                            break
                        else:
                            old_resseq += 1

                        # The H of GLY is replaced with CB in this dataset
                        try:
                            dis_ca_ca = residue["CA"] - r2["CA"]
                            dis_ca_cb = residue["CA"] - r2["CB"]
                            dis_cb_ca = residue["CB"] - r2["CA"]
                            dis_cb_cb = residue["CB"] - r2["CB"]
                        except Exception as ex:
#                             print(pdbId, ex)
                            has_something = True
                            os.system(f"echo '{pdbId}' >> {pre}/without_discontinues_and_gly_exception")
                            need = False
                            break
                        outLine += f"{pdbId},{i},{j},{residue.get_resname()},{r2.get_resname()},{dis_ca_ca},{dis_ca_cb},{dis_cb_ca},{dis_cb_cb}\n"
                    if need:
                        out.write(outLine)
#         if has_something:
#             print(pdbId)


0
200
400
600
800
1000
1200
1400
1600
1800
2000
2200
2400
2600
2800
3000
3200
3400
3600
3800
4000
4200
4400
4600
4800
5000
5200
5400
5600
5800
6000
6200
6400
6600
6800
7000
7200
7400
7600
7800
8000
8200
8400
8600
8800
9000
9200
9400
9600
9800
10000
10200
10400
10600
10800
11000
11200
11400
11600
11800
12000
12200
12400
12600
12800
13000
13200
13400
13600
13800
14000
14200
14400

In [95]:
# combine all to one data.
pdb_list = os.listdir("/Users/weilu/Research/optimization/fragment/processed_2/")
for cc, pdb in enumerate(pdb_list):
    os.system(f"cat /Users/weilu/Research/optimization/fragment/processed_2/{pdb} >> /Users/weilu/Research/optimization/fragment/data_jan14.csv")

In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [68]:
min_seq_sep=3
max_seq_sep=9
parser = bio.PDBParser(QUIET=True)

pdb_list = os.listdir(database)
for cc, pdb in enumerate(pdb_list):
    pdbId = pdb.split('.')[0]
    if cc % 1000 == 0:
        print(cc)
    with open(pre+f"processed/{pdbId}.csv", "w") as out:
#         out.write("pdb,i,j,res1,res2,dis_ca_ca,dis_ca_cb,dis_cb_ca,dis_cb_cb\n")
        structure = parser.get_structure("x", os.path.join(database, pdb))
        try:
            for model in structure:
                for chain in model:
                    all_residues = list(chain)
                    for i, residue in enumerate(all_residues):
            #             print(i, residue.get_resname())
                        for j, r2 in enumerate(all_residues[i+min_seq_sep:i+max_seq_sep+1]):
                            dis_ca_ca = residue["CA"] - r2["CA"]
                            # The H of GLY is replaced with CB in this dataset
                            dis_ca_cb = residue["CA"] - r2["CB"]
                            dis_cb_ca = residue["CB"] - r2["CA"]
                            dis_cb_cb = residue["CB"] - r2["CB"]
                            out.write(f"{pdbId},{i},{j},{residue.get_resname()},{r2.get_resname()},{dis_ca_ca},{dis_ca_cb},{dis_cb_ca},{dis_cb_cb}\n")
        except Exception as ex:
            print(pdbId, ex)
            os.system(f"echo '{pdbId}' >> {pre}/error_message")


0
1w5cC02 'CB'
1x6zA00 'CA'
2vqeL00 'CB'
3fhxA00 'CB'
2j3tB00 'CB'
1fbsA00 'CB'
1w97L01 'CB'
2yqyA00 'CA'
2uxeA00 'CB'
2p7hA00 'CB'
2xseA00 'CB'
1ur3M00 'CB'
1000
2j01100 'CB'
2y43A00 'CB'
1c5aA00 'CB'
1w79A01 'CB'
3qtaA00 'CB'
1vkbA00 'CB'
2iu1A00 'CB'
5a1nB00 'CA'
1ofuX00 'CB'
4avrA00 'CB'
1ofcX04 'CB'
2000
1grlA03 'CB'
1o9nA00 'CB'
2bz0A00 'CB'
3iteA00 'CB'
1cm3A00 'CA'
2wrzA02 'CB'
2wp8A00 'CB'
1usdA00 'CB'
2vyrA00 'CB'
3000
2cmrA00 'CB'
2c81A02 'CB'
2iu5B00 'CB'
2bzvA00 'CA'
1urjA03 'CB'
3vqjA00 'CA'
1pu6A01 'CB'
2c7fE01 'CB'
1w3fA02 'CB'
1cy5A00 'CB'
1i24A01 'CB'
4000
3gb8B00 'CB'
3dmlA00 'CB'
5teeA01 'CB'
2g8lA01 'CB'
2qe8A00 'CB'
2vpzC00 'CB'
2xitA00 'CB'
4ghbA00 'CA'
2uuzB00 'CB'
2y38A02 'CB'
1obbA00 'CB'
3zfjA00 'CB'
2j16A00 'CB'
4ak2A02 'CB'
5000
2o0aA00 'CB'
3k0xA00 'CB'
2jd3A01 'CB'
2bs2B02 'CB'
5fpoA01 'CA'
2oh1A00 'CB'
3km5A00 'CB'
2vhoB01 'CB'
1bgwA03 'CB'
2vpzA05 'CB'
4mtlA00 'CB'
2w0iA00 'CB'
6000
2vo8A00 'CB'
1b8bA00 'CB'
4adiB04 'CA'
2w1kA00 'CB'
2vwaA00 'CB'
1o88A00 'CB'
2v3cC03 'CB'
1epaA00 'CB'
4bqnA02 'CB'
2cb1A02 'CB'
2jbxA00 'CB'
2rh2A00 'CA'
2wro400 'CB'
5m0pA00 'CB'
7000
2bkoA02 'CB'
2tbvA00 'CA'
1qdmA03 'CB'
4b6eB01 'CB'
1b89A00 'CB'
3fotA02 'CB'
2j6vA00 'CB'
1r4sA00 'CB'
2wp7A00 'CB'
1hxnA00 'CB'
5anpA00 'CB'
2cazE00 'CB'
3sonA00 'CB'
4hd9A02 'CB'
8000
1h3oA00 'CB'
3fotA01 'CB'
2blkA00 'CB'
2cfuA03 'CB'
5dnoA00 'CA'
2wryA00 'CB'
1v1qA00 'CB'
5kcoA03 'CB'
1ryqA00 'CB'
9000
3fcsA03 'CB'
1gvnD00 'CB'
1wa5C00 'CB'
10000
2c5qA00 'CB'
2vhtA01 'CB'
1o7dB00 'CB'
1ohuA00 'CB'
2wnwA01 'CB'
2bl7A00 'CA'
2uvgA01 'CA'
1oksA00 'CB'
2jbwA02 'CB'
2jlnA00 'CB'
11000
4d4eA00 'CB'
1o7dE00 'CB'
4nxiA00 'CB'
2j42A03 'CB'
1h9oA00 'CB'
4nhxA02 'CB'
1yq3C02 'CB'
3zqeB00 'CB'
12000
2fefA02 'CB'
2vxaA00 'CB'
1qftB00 'CB'
2f46A00 'CB'
3zqcA02 'CB'
2j01D03 'CB'
4g1mA04 'CB'
2vhoR00 'CB'
2ga1A01 'CA'
1ae9A00 'CB'
4ag4A02 'CB'
1hk7A02 'CB'
4nz1A01 'CA'
3eqcA02 'CB'
4bg7A00 'CB'
4ywqA00 'CB'
2x6rB01 'CB'
2cbzA00 'CB'
1upiA00 'CB'
13000
2xokI00 'CA'
2pw0B02 'CB'
1wa9B03 'CA'
1gwbA00 'CB'
4o61A00 'CB'
1w63Q00 'CB'
1qgrA00 'CB'
2vkxA02 'CB'
2y3qA00 'CB'
4qqgG00 'CB'
2vhoM02 'CB'
14000
1ohgA02 'CB'
1pp9T00 'CA'
1w1wE00 'CB'
3eniC00 'CB'
2ca6A00 'CB'
2vgeA00 'CB'
1h1cA01 'CB'
3tguH00 'CB'

In [ ]:
#                 if residue.get_resname() == "GLY":
    #                     print(list(residue), residue["CB"].get_vector())
    #                 print(i, j, dis_ca_ca, residue["CA"].get_vector(), r2["CA"].get_vector())

In [15]:
for model in structure:
    for chain in model :
        polypeptides = Bio.PDB.PPBuilder().build_peptides(chain)
        for poly_index, poly in enumerate(polypeptides) :
            print(f"Model {model.id} Chain {chain.id}",)
            print("(part %i of %i)" % (poly_index+1, len(polypeptides)))
            print("length %i" % (len(poly)),)
            print("from %s%i" % (poly[0].resname, poly[0].id[1]),)
            print("to %s%i" % (poly[-1].resname, poly[-1].id[1]))
            print(poly.get_phi_psi_list())


Model 0 Chain A
(part 1 of 1)
length 327
from ALA4
to LEU330
[(None, 2.755865773347726), (-0.7263727222595183, -0.9913716409179782), (-0.9681369489362346, -0.8924334469455821), (-0.9499990608872447, -1.0182758578625297), (-0.9479768993361841, -0.6854089767784127), (-1.0712313384452894, -0.5852157016397055), (-1.5034558518765502, -0.564523215818365), (-0.9230340286794444, -0.9997785236781394), (-0.9384171785077553, -0.8810425939691152), (-0.9719864669835374, -0.7281216718353706), (-1.1281868035944738, -0.7835953932861571), (-1.0655430279228988, -0.946022335991926), (-1.0906293536226301, -0.5212158108961245), (-1.4045799619748414, -0.9362936255519249), (-0.9316345730833568, -0.8785416318071398), (-1.1752775054907985, -0.5029134798688377), (-1.0093567696173975, -0.7488781161412928), (-1.2294547581421433, -0.5736097301990386), (-1.031986559535758, -0.5985467212419624), (-1.3098925215060278, -0.6222877619230646), (-1.3030675451506892, -0.655380619450542), (-1.5284971999208499, -0.7071486532561408), (-1.3168361643172648, -0.5037695827496782), (-2.1557640717793847, 0.00458496406059547), (1.079141097464262, 0.7727908542272238), (-1.8179568433361748, 2.4013204609135625), (-1.7835726572946102, 2.6689005532236374), (-1.3365297124123507, 2.3396334252589313), (-2.169699273425646, 2.7071849194982462), (-1.2884371480681993, 2.253237831851062), (-1.5181143039558775, 2.40235103858361), (-1.1050310365718425, 2.862795099387136), (-2.2459536106143996, -0.48684774920312074), (-1.8633856365879766, 2.879026956803258), (-2.591972558329521, 2.6362543831790886), (-1.4145787246417796, 2.3912295525825167), (-1.2341577493497677, 2.311439660358874), (1.872224357788843, -0.14837749793365296), (-1.8510082488539685, 0.2471932816547138), (1.2431528454388525, -0.15101485245977472), (-1.66251369974552, 0.249249355661889), (-1.7774619902531028, 2.311635116684938), (-1.2686711840054525, 1.8933851566515352), (-1.5587134117504926, -0.14839943729017066), (1.127114474669416, -2.27197761089421), (-1.1472114469688404, -0.2283230545481784), (1.432127005794265, 0.012051554617899357), (-1.7858626269354465, -0.28303967322233525), (-1.6966900538652467, 1.9875621215907084), (-1.2241163669096933, 2.3623093832677076), (-1.663019928746889, 3.0288517197645515), (-1.2403130526558068, 1.9979965944833304), (-1.3497802487041581, 2.2460835930549643), (-1.972447088330774, 2.3905288492417247), (-1.7285082712030113, 2.048249235756001), (-1.536464340133041, 2.1068485397567054), (-1.3542944212541017, -0.4210591174697601), (-1.2821258423547466, -0.40664835235585084), (-2.1481892095679225, 1.1325547097152673), (-1.2703565313443976, -0.34325432407141243), (-1.8820448731932968, 0.40041980966060153), (-2.098534399865577, 2.7149498853092684), (-1.9790667202012129, 2.5049368631011695), (-2.355608216078096, 2.6619071640682446), (-2.3802255738485614, 2.7159096871316186), (-1.4499853243519998, 2.529522483628629), (-1.3829576542143862, 2.5891608668473585), (-1.454634963852523, -0.5824444328018296), (3.0823449921416315, 2.863504614725687), (-1.5739683383919028, 0.3686396826393114), (-0.6375460621475025, -0.8757296418789521), (-1.6131158897685278, -0.6241843464401091), (-0.9047016400116621, -0.6276304660357414), (-1.2750527264770963, -0.6410378426490374), (-1.2749827825247273, -0.6580155954561565), (-1.1026001759963546, -0.7262418492477046), (-1.1519020434303446, -0.6252713919404208), (-1.2555462386558458, -0.7099675141980324), (-1.1205241522351694, -0.7027693518757456), (-1.1784099196180131, -0.7509772074534239), (-1.6579030559551964, 0.21831474928379765), (1.0749968454287724, 0.8503320872366655), (-1.1895014951934013, 2.5492865665526696), (-1.6543768248378892, 3.1357778871513964), (-1.4745017889284553, 2.321731280529887), (1.6766400744745509, 0.014801579551044917), (-2.0578724563694775, 2.372599823364104), (-2.569379663210434, 3.0679285903429547), (-2.1621527360764694, 2.771075074336215), (-2.495208861768309, 2.6806091463923436), (-2.2130230003791906, 2.8413996877623946), (-1.573096106092447, 1.5650394564443764), (-1.3135950264728518, 2.3087696458703197), (-1.9678986312845153, 2.433038947108793), (-2.624887288086069, 2.7173272714219503), (-1.9443299858835288, 2.287594036759257), (-2.2101683023321765, 1.4123790013002315), (-1.3152805310484974, -0.036498189017470144), (-1.9262402433378463, 0.22719006587908605), (-0.9233937714989262, 2.406053675225235), (-1.5651650212369252, -0.7607959567008574), (-2.25854554672815, 2.073946879189327), (-1.2420358228908488, 2.4405449847103826), (-2.748287465336112, -3.100036169457371), (-0.9240486551113872, -0.23924325105133296), (-2.00096409150626, -0.6707910438110424), (-1.7718422544415746, 2.1940520256096407), (-1.9892486812507848, 2.931482327272971), (-1.3353024418664994, -0.45393738562352426), (-1.6284431616620083, 1.8480771334718837), (-1.8619048018032287, 2.081992682014634), (-2.0230365978814175, 2.585253304812523), (-2.6286399277500134, 2.5096923604540367), (-1.2646818985565456, 1.7144225138333062), (-1.6710458720271864, 2.0032946935002443), (-2.253795519172002, 2.753489553088419), (-2.609320683762885, 2.6216073766294485), (-2.4593803628335515, 2.3776090513736934), (-1.3664130053108108, 2.3966818033279607), (-1.6803116088647727, 2.752964020350542), (-1.9081073293542243, -3.0223565942580346), (-1.0563555734286931, 2.1760100420260984), (1.3352100587099351, 0.42198350845896926), (-1.7406023561233028, -0.07201240822000556), (-1.4053627877080563, 1.1590223095532113), (-2.752001695205313, 2.8918223344592477), (-1.091553775488701, -0.6182770216533574), (-0.9798743631783731, -0.49561190780223685), (-1.2562639433538112, -0.6826436005499775), (-1.1650349546893268, -0.8791792701855685), (-0.9640364074028176, -0.7599553976441191), (-1.2542442421495097, -0.8219877818878534), (-0.9896261727165321, -0.8103862776647602), (-1.0032548951575146, -0.5866376537161225), (-1.3457959544788125, -0.6550423395892508), (-1.0844131434427449, -0.798504566575689), (-1.0084089478750593, -0.6808391781080685), (-1.1816625471592, -0.6383797696570453), (-1.22788495518716, -0.3746474130760273), (-1.2919312876488864, -0.7217841835449496), (-1.1626594738512563, -0.8356675620470045), (-1.1622353432330979, -0.6003375944759428), (-0.913218966173604, -0.9343265743100466), (-1.1529450048568401, -0.5338866410145312), (-1.2519441941830816, -0.5124575602853405), (-1.3866256185050576, -0.6043384991413739), (-1.1839727297827811, -0.7604991285213014), (-1.0461413732750897, -0.6898570364521596), (-1.1716539158905204, 0.019065913666369384), (-2.456563807266459, -0.21960425356370353), (-1.3165935414688825, -0.8507430440790965), (-2.0081536187476776, 0.24232653696783008), (1.0847512126651249, 0.2727891960268986), (-1.3963667395900292, 2.1231957223877305), (-1.2057829061295289, 2.30172508368376), (-1.4840391434538769, 2.6942004131373083), (-2.5900819467467717, -0.32432352152102356), (-1.2127469684969536, 2.5579231471687023), (-0.9978451557159604, 2.610807680517679), (-1.2473986566710225, -0.4221083280346847), (-2.522559411860593, 2.679223623379933), (-1.7311950339670794, 2.630744146314685), (-2.054906244868834, 2.614262228702029), (-1.86120006335984, 2.1477645645682677), (-2.301291989070371, 2.180014606120523), (-1.251325436816499, 2.5463438407963856), (-1.0754532766349443, -0.5676552303447446), (-1.1821737033127793, -0.5440625528174041), (-1.532477650467144, -0.2709599526965621), (-1.1672594706451531, -0.860588318647775), (-1.0535096255732135, -0.5579869067192472), (-1.2045697217089504, -0.7268665133381793), (-1.3775588907304015, -0.35115948844087425), (-2.5658932054866526, 1.147034777408371), (-1.3275307304381812, -0.5480561432826806), (-1.2248479993894785, -0.771409690029247), (-1.1051775212497148, 2.68564956611545), (-1.4424327192977728, 2.9884068870452047), (-0.7895594323151384, -0.9321000676872014), (-1.232496484392179, -0.4806928152554353), (-1.373340377293235, -0.40130555035388554), (-1.287168951123467, -0.6458516450907664), (-1.2369934568630638, -0.6642484362921676), (-1.0617176358629157, -0.7294505705400857), (-1.1269983155201118, -0.9272618900738288), (-1.065540386186242, -0.5531686622448678), (-1.2427172076705786, -0.6785861203026722), (-1.26863916171731, -0.6198648797865248), (-1.1365118904910294, -0.835630767777841), (-1.8946169010296718, -0.49171789358647044), (1.4561238241212944, -0.42711113088457514), (-2.8948103562704826, 1.7587768236699444), (-2.071127638287508, 2.252773591435529), (-1.5058650061706835, 1.945444397386593), (-1.6223792884922, 1.786841878274881), (-1.3393549773282476, 2.5386362837492644), (1.42559756934558, 0.9745123770304939), (-1.7661333154741383, 1.751797018701088), (2.589896726585209, -0.5601983657613325), (-1.5484105722270765, 2.7053538357525446), (-1.3143254638635153, 2.1828671230628873), (-1.4729648955763122, -3.013713818150735), (-1.025889705470308, -0.5204065844912356), (-1.2419612676347096, -2.344986945273328), (-2.452484103290384, 0.6857270325735126), (-2.4028590267087058, 2.3290637317776053), (-1.0964086276678826, 2.8372566105094994), (-1.372013143431197, -0.9185334530455069), (-2.4412190157572495, 2.5292681957708285), (-0.8456581673626128, 2.457621175731862), (-2.5183727191191925, 2.485768712565173), (-1.1300053931176623, 2.5488374626056363), (-1.1129181420452023, -0.5093195352604465), (-1.8854890069176773, 0.039312640674143466), (-2.1493561952816798, -0.6961438706877264), (-1.6479325268131182, 2.4528824972513292), (-1.581607437091124, 1.8070015329704012), (-1.9427448645184666, 0.4057055891878049), (-2.490331339417093, -0.20540094002620926), (-1.4208434658445792, 2.271910944001683), (-1.1395921052926472, 2.308927405603024), (-1.6807163262205789, 2.9570917146713374), (-1.1762641676031333, -0.0021896619425143572), (-1.3067789710423552, -0.38699889334496507), (1.9726833383968407, -0.30318287654591486), (-2.314142014499435, 3.081881447583343), (-1.4002103435522675, 2.383653394897673), (2.121786845183188, 2.3046178029140094), (-1.9572351907810617, 0.6262451676731889), (-2.3880482013818525, 3.0400362678012596), (3.0645571782131595, -3.014847469939042), (-2.047470881145241, 2.6556051323334486), (-1.5891872816279002, 2.248510192192863), (-2.3481557700613576, 2.3692402213716672), (-2.2510513811735438, 2.888684166672257), (-1.395278984071984, 2.0350844235453036), (-1.5780220493300494, 1.9513866202385584), (-1.4090144139822343, -0.29853097612204826), (-1.3097944646360982, -0.7253523988141444), (-1.449973513169295, -0.1559807061138012), (0.681491736104236, 0.8181385437476878), (-2.6269380076970337, 3.014715519081167), (-1.2897124324829332, 2.5335024750787616), (-2.2806330638258805, 1.9067577517091054), (-1.3807959183056004, 2.1298796409009935), (-1.7456971273214514, -0.574987353671736), (-2.933730982409418, 2.6910646214468836), (-2.2963392587025155, 2.2143283595496337), (-2.4452213708458577, 3.1343210059176863), (2.876765477581076, 2.662336798756101), (-1.1195344530767555, 2.432725226473285), (-1.5233811672043966, 2.409844431186222), (-0.7178009905739817, 2.529883713201013), (-1.9066697908864656, -3.000815114432045), (-1.1119397690387867, -0.6425556083898356), (-1.409713600089415, -0.618923802725153), (-1.2397469461912174, -0.5285422951405299), (-1.1228364732608083, -0.7053572322877316), (-1.3885286577762594, -0.4039810349983512), (-1.3267709926389983, -0.8025725108386508), (-1.3840703255146034, -0.6678204636069326), (-1.0434671429109559, -0.585508732147898), (-1.3343131401345578, -0.3236353258271426), (-1.6040409339914838, -0.47515279750050254), (-1.6627130366646097, -0.0159390767110252), (1.2767345237416992, 0.511229940150548), (-2.1440360298866197, 1.036703153942135), (-1.551883599407677, 0.08385147658467411), (-1.4299471194887396, -0.3899764744199467), (-1.051777732450244, -0.31652904600379533), (-1.0153664332368435, -0.451619639697247), (-1.681385592670028, 0.1717891890045439), (-1.5473829214248842, 2.6721870684590208), (-0.84862098358786, -0.9504375633665949), (-1.2289297797075835, -0.73783815373075), (-1.23831793105263, -0.6225750576702481), (-1.1729609098882867, -0.6876084644886511), (-1.2451614423720387, -0.5229898022300739), (-1.1612080925721864, -0.7361708385962109), (-1.1673612041136612, -0.5618606347435479), (-1.5149955510536255, 0.1176556881738043), (1.0345034844294554, 0.929163527867549), (-1.777909745635943, -0.06377381256423806), (-2.0344570829349187, 2.645296101158463), (-1.3195612660689693, 2.966259923698993), (-1.5013834315124253, 2.2209138683335987), (-2.273162227966676, 2.8203596253947834), (-1.9610629930491639, 2.3990573514286804), (-2.6926898831329447, 2.973783135804107), (-3.098202463280601, 3.085559241461279), (-2.1315221949185332, 2.1424057249262325), (-1.7994790031137364, 2.2465990108366936), (-1.214694171508649, 2.1717983009213713), (-1.276321921134568, -0.7898454074280244), (-1.3230989371806148, -0.6345374097086004), (-1.0965839118262906, -0.6388930390928559), (-1.3685482333265078, -0.7347945832356337), (-1.088686662336953, -0.7778641106838695), (-1.1479251573503348, -0.41777729127089935), (-1.3309553945792616, -0.6693500459060405), (-1.5427023820918406, -0.4102930122172753), (-1.62054773508379, -0.21583869735520195), (1.2743041283609404, 0.9303655847552729), (-2.0586707384259912, 2.5132388847053324), (-1.5773210754809406, -0.08941345485651361), (-2.4194958390561108, 2.380223846328947), (-1.1570011912573015, -0.4527944421948005), (-1.088328287468529, -0.19937804982769047), (-1.565624609650636, -0.2219467125344413), (-2.0213205953595925, -0.07285762847528858), (-2.367400637179807, 2.540713438106683), (-1.7895558182081603, 1.7397397030373671), (-2.4051087212220983, -2.9626104897415484), (-1.8565741006380578, 2.288196902411354), (-2.238545704105567, 2.6596352880382073), (-0.9666815530270898, 2.5586763454967247), (-0.9361923742240769, -0.6336041112018779), (-1.2055812607858738, -0.5362175026468311), (-1.449944648236717, -0.5737026891561948), (-0.9498381426129934, -0.6990666769241073), (-1.1293694083486383, -0.9719456063582259), (-1.5213663151108219, -0.3268838222730662), (-2.0789813041673733, 2.140693339774827), (-1.5488262838734583, 2.7122744018729996), (1.2599996304081935, 0.5357598428091993), (-1.3616407942846018, 2.3060689848557288), (1.7504285618024076, None)]

In [26]:
from small_script.rama_lib.ramachandran_calc import ramachandran
plt.rcParams['figure.figsize'] = [16.18033, 10]

In [27]:
ramachandran([a, b])


12asA00.pdb <Model id=0> <Chain id=A> ASP207 is an outlier
2a01C01.pdb <Model id=0> <Chain id=C> LYS40 is an outlier
2a01C01.pdb <Model id=0> <Chain id=C> GLU92 is an outlier
2a01C01.pdb <Model id=0> <Chain id=C> GLU139 is an outlier
2a01C01.pdb <Model id=0> <Chain id=C> LEU141 is an outlier
2a01C01.pdb <Model id=0> <Chain id=C> SER142 is an outlier

In [ ]: