Maping Multi-Chain Ab Structure to Amino Acid Sequence


  1. Align to PDB Sequence
  2. Highlight Coresponding Positions

In [1]:
import sys
import os
from collections import OrderedDict, defaultdict
import pprint
import nglview as ngl
from Bio.PDB import *
from Bio.Seq import Seq
from Bio import pairwise2 # pairwise alignment

pretty = pprint.PrettyPrinter(indent=2)

In [2]:
# Globals
PDB_FILE = '4hj0.pdb'
DIR = 'PDB_Struct'

# Structure Viewer from PDB FILE(NGL)
view = ngl.show_structure_file(os.path.join(DIR, PDB_FILE))
view



In [3]:
# download structure from PDB
# pdbl = PDBList()
# pdbl.retrieve_pdb_file(PDB_ID, pdir=DIR)

# parse the structure
p = PDBParser()
parser = MMCIFParser()
structure = p.get_structure('STRUCT_OBJ', os.path.join(DIR, PDB_FILE))
chains = structure.get_chains()
residue_list = []

for chain in chains:
    residues = chain.get_residues()
    ress = []
    for residue in residues:
        ress.append(residue.get_resname())
    residue_list.append(ress)

print(len(residue_list[0]))
print(residue_list)


92
[['THR', 'ALA', 'GLY', 'GLU', 'LEU', 'TYR', 'GLN', 'ARG', 'TRP', 'GLU', 'ARG', 'TYR', 'ARG', 'ARG', 'GLU', 'CYS', 'GLN', 'GLU', 'THR', 'LEU', 'ALA', 'ALA', 'ALA', 'GLU', 'PRO', 'PRO', 'SER', 'GLY', 'LEU', 'ALA', 'CYS', 'ASN', 'GLY', 'SER', 'PHE', 'ASP', 'MET', 'TYR', 'VAL', 'CYS', 'TRP', 'ASP', 'TYR', 'ALA', 'ALA', 'PRO', 'ASN', 'ALA', 'THR', 'ALA', 'ARG', 'ALA', 'SER', 'CYS', 'PRO', 'TRP', 'TYR', 'LEU', 'PRO', 'TRP', 'HIS', 'HIS', 'HIS', 'VAL', 'ALA', 'ALA', 'GLY', 'PHE', 'VAL', 'LEU', 'ARG', 'GLN', 'CYS', 'GLY', 'SER', 'ASP', 'GLY', 'GLN', 'TRP', 'GLY', 'LEU', 'TRP', 'ARG', 'ASP', 'HIS', 'THR', 'GLN', 'CYS', 'GLU', 'ASN', 'PRO', 'GLU'], ['THR', 'ALA', 'GLY', 'GLU', 'LEU', 'TYR', 'GLN', 'ARG', 'TRP', 'GLU', 'ARG', 'TYR', 'ARG', 'ARG', 'GLU', 'CYS', 'GLN', 'GLU', 'THR', 'LEU', 'ALA', 'ALA', 'ALA', 'GLU', 'PRO', 'PRO', 'SER', 'GLY', 'LEU', 'ALA', 'CYS', 'ASN', 'GLY', 'SER', 'PHE', 'ASP', 'MET', 'TYR', 'VAL', 'CYS', 'TRP', 'ASP', 'TYR', 'ALA', 'ALA', 'PRO', 'ASN', 'ALA', 'THR', 'ALA', 'ARG', 'ALA', 'SER', 'CYS', 'PRO', 'TRP', 'TYR', 'LEU', 'PRO', 'TRP', 'HIS', 'HIS', 'HIS', 'VAL', 'ALA', 'ALA', 'GLY', 'PHE', 'VAL', 'LEU', 'ARG', 'GLN', 'CYS', 'GLY', 'SER', 'ASP', 'GLY', 'GLN', 'TRP', 'GLY', 'LEU', 'TRP', 'ARG', 'ASP', 'HIS', 'THR', 'GLN', 'CYS', 'GLU', 'ASN', 'PRO'], ['GLN', 'LEU', 'GLN', 'GLN', 'SER', 'GLY', 'ALA', 'GLU', 'VAL', 'LYS', 'LYS', 'PRO', 'GLY', 'SER', 'SER', 'VAL', 'LYS', 'VAL', 'SER', 'CYS', 'LYS', 'ALA', 'SER', 'GLY', 'GLY', 'THR', 'PHE', 'SER', 'SER', 'TYR', 'ALA', 'ILE', 'SER', 'TRP', 'VAL', 'ARG', 'GLN', 'ALA', 'PRO', 'GLY', 'GLN', 'GLY', 'LEU', 'GLU', 'TRP', 'MET', 'GLY', 'GLY', 'ILE', 'ILE', 'PRO', 'THR', 'PHE', 'GLY', 'THR', 'ALA', 'ASN', 'TYR', 'ALA', 'GLN', 'LYS', 'PHE', 'GLN', 'GLY', 'ARG', 'VAL', 'THR', 'ILE', 'THR', 'ALA', 'ASP', 'GLU', 'SER', 'THR', 'SER', 'THR', 'ALA', 'TYR', 'MET', 'GLU', 'LEU', 'SER', 'SER', 'LEU', 'ARG', 'SER', 'GLU', 'ASP', 'THR', 'ALA', 'VAL', 'TYR', 'TYR', 'CYS', 'ALA', 'GLN', 'GLY', 'PRO', 'ILE', 'VAL', 'GLY', 'ALA', 'PRO', 'THR', 'ASP', 'TYR', 'TRP', 'GLY', 'LYS', 'GLY', 'THR', 'LEU', 'VAL', 'THR', 'VAL', 'SER', 'SER', 'ALA', 'SER', 'THR', 'LYS', 'GLY', 'PRO', 'SER', 'VAL', 'PHE', 'PRO', 'LEU', 'ALA', 'PRO', 'SER', 'GLY', 'THR', 'ALA', 'ALA', 'LEU', 'GLY', 'CYS', 'LEU', 'VAL', 'LYS', 'ASP', 'TYR', 'PHE', 'PRO', 'GLU', 'PRO', 'VAL', 'THR', 'VAL', 'SER', 'TRP', 'ASN', 'SER', 'GLY', 'ALA', 'LEU', 'THR', 'SER', 'GLY', 'VAL', 'HIS', 'THR', 'PHE', 'PRO', 'ALA', 'VAL', 'LEU', 'GLN', 'SER', 'SER', 'GLY', 'LEU', 'TYR', 'SER', 'LEU', 'SER', 'SER', 'VAL', 'VAL', 'THR', 'VAL', 'PRO', 'SER', 'SER', 'THR', 'GLN', 'THR', 'TYR', 'ILE', 'CYS', 'ASN', 'VAL', 'ASN', 'HIS', 'LYS', 'PRO', 'SER', 'ASN', 'THR', 'LYS', 'VAL', 'ASP', 'LYS', 'ARG', 'VAL'], ['SER', 'TYR', 'VAL', 'LEU', 'THR', 'GLN', 'PRO', 'PRO', 'SER', 'ALA', 'SER', 'GLY', 'THR', 'PRO', 'GLY', 'GLN', 'ARG', 'VAL', 'ALA', 'ILE', 'SER', 'CYS', 'SER', 'GLY', 'SER', 'ASN', 'SER', 'ASN', 'ILE', 'GLY', 'SER', 'ASN', 'THR', 'VAL', 'HIS', 'TRP', 'TYR', 'GLN', 'GLN', 'LEU', 'PRO', 'GLY', 'ALA', 'ALA', 'PRO', 'LYS', 'LEU', 'LEU', 'ILE', 'TYR', 'SER', 'ASN', 'ASN', 'GLN', 'ARG', 'PRO', 'SER', 'GLY', 'VAL', 'PRO', 'ASP', 'ARG', 'PHE', 'SER', 'GLY', 'SER', 'ASN', 'SER', 'GLY', 'THR', 'SER', 'ALA', 'SER', 'LEU', 'ALA', 'ILE', 'SER', 'ARG', 'LEU', 'GLN', 'SER', 'GLU', 'ASP', 'GLU', 'ALA', 'ASP', 'TYR', 'TYR', 'CYS', 'ALA', 'ALA', 'TRP', 'ASP', 'ASP', 'SER', 'LEU', 'ASN', 'GLY', 'VAL', 'VAL', 'PHE', 'GLY', 'GLY', 'GLY', 'THR', 'LYS', 'VAL', 'THR', 'VAL', 'LEU', 'GLN', 'PRO', 'LYS', 'ALA', 'ALA', 'PRO', 'SER', 'VAL', 'THR', 'LEU', 'PHE', 'PRO', 'PRO', 'SER', 'SER', 'GLU', 'GLU', 'LEU', 'GLN', 'ALA', 'ASN', 'LYS', 'ALA', 'THR', 'LEU', 'VAL', 'CYS', 'LEU', 'ILE', 'SER', 'ASP', 'PHE', 'TYR', 'PRO', 'GLY', 'ALA', 'VAL', 'THR', 'VAL', 'ALA', 'TRP', 'LYS', 'ALA', 'ASP', 'SER', 'SER', 'PRO', 'VAL', 'LYS', 'ALA', 'GLY', 'VAL', 'GLU', 'THR', 'THR', 'THR', 'PRO', 'SER', 'LYS', 'GLN', 'SER', 'ASN', 'ASN', 'LYS', 'TYR', 'ALA', 'ALA', 'SER', 'SER', 'TYR', 'LEU', 'SER', 'LEU', 'THR', 'PRO', 'GLU', 'GLN', 'TRP', 'LYS', 'SER', 'HIS', 'ARG', 'SER', 'TYR', 'SER', 'CYS', 'GLN', 'VAL', 'THR', 'HIS', 'GLU', 'VAL', 'GLU', 'LYS', 'THR', 'VAL', 'ALA', 'PRO', 'THR', 'GLU'], ['SER', 'TYR', 'VAL', 'LEU', 'THR', 'GLN', 'PRO', 'PRO', 'SER', 'ALA', 'SER', 'GLY', 'THR', 'PRO', 'GLY', 'GLN', 'ARG', 'VAL', 'ALA', 'ILE', 'SER', 'CYS', 'SER', 'GLY', 'SER', 'ASN', 'SER', 'ASN', 'ILE', 'GLY', 'SER', 'ASN', 'THR', 'VAL', 'HIS', 'TRP', 'TYR', 'GLN', 'GLN', 'LEU', 'PRO', 'GLY', 'ALA', 'ALA', 'PRO', 'LYS', 'LEU', 'LEU', 'ILE', 'TYR', 'SER', 'ASN', 'ASN', 'GLN', 'ARG', 'PRO', 'SER', 'GLY', 'VAL', 'PRO', 'ASP', 'ARG', 'PHE', 'SER', 'GLY', 'SER', 'ASN', 'SER', 'GLY', 'THR', 'SER', 'ALA', 'SER', 'LEU', 'ALA', 'ILE', 'SER', 'ARG', 'LEU', 'GLN', 'SER', 'GLU', 'ASP', 'GLU', 'ALA', 'ASP', 'TYR', 'TYR', 'CYS', 'ALA', 'ALA', 'TRP', 'ASP', 'ASP', 'SER', 'LEU', 'ASN', 'GLY', 'VAL', 'VAL', 'PHE', 'GLY', 'GLY', 'GLY', 'THR', 'LYS', 'VAL', 'THR', 'VAL', 'LEU', 'GLN', 'PRO', 'LYS', 'ALA', 'ALA', 'PRO', 'SER', 'VAL', 'THR', 'LEU', 'PHE', 'PRO', 'PRO', 'SER', 'SER', 'GLU', 'GLU', 'LEU', 'GLN', 'ALA', 'ASN', 'LYS', 'ALA', 'THR', 'LEU', 'VAL', 'CYS', 'LEU', 'ILE', 'SER', 'ASP', 'PHE', 'TYR', 'PRO', 'GLY', 'ALA', 'VAL', 'THR', 'VAL', 'ALA', 'TRP', 'LYS', 'ALA', 'ASP', 'SER', 'SER', 'PRO', 'VAL', 'LYS', 'ALA', 'GLY', 'VAL', 'GLU', 'THR', 'THR', 'THR', 'PRO', 'SER', 'LYS', 'GLN', 'SER', 'ASN', 'ASN', 'LYS', 'TYR', 'ALA', 'ALA', 'SER', 'SER', 'TYR', 'LEU', 'SER', 'LEU', 'THR', 'PRO', 'GLU', 'GLN', 'TRP', 'LYS', 'SER', 'HIS', 'ARG', 'SER', 'TYR', 'SER', 'CYS', 'GLN', 'VAL', 'THR', 'HIS', 'GLU', 'GLY', 'VAL', 'GLU', 'LYS', 'THR', 'VAL', 'ALA', 'PRO', 'THR', 'GLU'], ['GLN', 'LEU', 'GLN', 'GLN', 'SER', 'GLY', 'ALA', 'GLU', 'VAL', 'LYS', 'LYS', 'PRO', 'GLY', 'SER', 'SER', 'VAL', 'LYS', 'VAL', 'SER', 'CYS', 'LYS', 'ALA', 'SER', 'GLY', 'GLY', 'THR', 'PHE', 'SER', 'SER', 'TYR', 'ALA', 'ILE', 'SER', 'TRP', 'VAL', 'ARG', 'GLN', 'ALA', 'PRO', 'GLY', 'GLN', 'GLY', 'LEU', 'GLU', 'TRP', 'MET', 'GLY', 'GLY', 'ILE', 'ILE', 'PRO', 'THR', 'PHE', 'GLY', 'THR', 'ALA', 'ASN', 'TYR', 'ALA', 'GLN', 'LYS', 'PHE', 'GLN', 'GLY', 'ARG', 'VAL', 'THR', 'ILE', 'THR', 'ALA', 'ASP', 'GLU', 'SER', 'THR', 'SER', 'THR', 'ALA', 'TYR', 'MET', 'GLU', 'LEU', 'SER', 'SER', 'LEU', 'ARG', 'SER', 'GLU', 'ASP', 'THR', 'ALA', 'VAL', 'TYR', 'TYR', 'CYS', 'ALA', 'GLN', 'GLY', 'PRO', 'ILE', 'VAL', 'GLY', 'ALA', 'PRO', 'THR', 'ASP', 'TYR', 'TRP', 'GLY', 'LYS', 'GLY', 'THR', 'LEU', 'VAL', 'THR', 'VAL', 'SER', 'SER', 'ALA', 'SER', 'THR', 'LYS', 'GLY', 'PRO', 'SER', 'VAL', 'PHE', 'PRO', 'LEU', 'ALA', 'PRO', 'GLY', 'THR', 'ALA', 'ALA', 'LEU', 'GLY', 'CYS', 'LEU', 'VAL', 'LYS', 'ASP', 'TYR', 'PHE', 'PRO', 'GLU', 'PRO', 'VAL', 'THR', 'VAL', 'SER', 'TRP', 'ASN', 'SER', 'GLY', 'ALA', 'LEU', 'THR', 'SER', 'GLY', 'VAL', 'HIS', 'THR', 'PHE', 'PRO', 'ALA', 'VAL', 'LEU', 'GLN', 'SER', 'SER', 'GLY', 'LEU', 'TYR', 'SER', 'LEU', 'SER', 'SER', 'VAL', 'VAL', 'THR', 'VAL', 'PRO', 'SER', 'SER', 'THR', 'GLN', 'THR', 'TYR', 'ILE', 'CYS', 'ASN', 'VAL', 'ASN', 'HIS', 'LYS', 'PRO', 'SER', 'ASN', 'THR', 'LYS', 'VAL', 'ASP', 'LYS', 'ARG', 'VAL']]

In [4]:
# Extract Polypeptides from a Structure Object:
import pdb
ppb = PPBuilder()
complete_aa_seq = []
complete_aa_chain_map = []
chain_lengths = []
for pp in ppb.build_peptides(structure):
    seq = pp.get_sequence()
    complete_aa_seq.append(str(seq))
    chain_lengths.append(len(seq))
    if len(pp.get_sequence()) >= 55:
        print ('        ' + str(len(seq)) + ': ' + seq[0] + ' -> ' + seq[50:57])

print('=====' * 15)
print('Full PDB AA SEQ: \n\n' + ''.join(complete_aa_seq))
print(chain_lengths)


        92: T -> RASCPWY
        91: T -> RASCPWY
        131: Q -> PTFGTAN
        201: S -> SNNQRPS
        202: S -> SNNQRPS
        130: Q -> PTFGTAN
===========================================================================
Full PDB AA SEQ: 

TAGELYQRWERYRRECQETLAAAEPPSGLACNGSFDMYVCWDYAAPNATARASCPWYLPWHHHVAAGFVLRQCGSDGQWGLWRDHTQCENPETAGELYQRWERYRRECQETLAAAEPPSGLACNGSFDMYVCWDYAAPNATARASCPWYLPWHHHVAAGFVLRQCGSDGQWGLWRDHTQCENPQLQQSGAEVKKPGSSVKVSCKASGGTFSSYAISWVRQAPGQGLEWMGGIIPTFGTANYAQKFQGRVTITADESTSTAYMELSSLRSEDTAVYYCAQGPIVGAPTDYWGKGTLVTVSSASTKGPSVFPLAPSGTAALGCLVKDYFPEPVTVSWNSGALTSGVHTFPAVLQSSGLYSLSSVVTVPSSTQTYICNVNHKPSNTKVDKRVSYVLTQPPSASGTPGQRVAISCSGSNSNIGSNTVHWYQQLPGAAPKLLIYSNNQRPSGVPDRFSGSNSGTSASLAISRLQSEDEADYYCAAWDDSLNGVVFGGGTKVTVLQPKAAPSVTLFPPSSEELQANKATLVCLISDFYPGAVTVAWKADSSPVKAGVETTTPSKQSNNKYAASSYLSLTPEQWKSHRSYSCQVTHEVEKTVAPTESYVLTQPPSASGTPGQRVAISCSGSNSNIGSNTVHWYQQLPGAAPKLLIYSNNQRPSGVPDRFSGSNSGTSASLAISRLQSEDEADYYCAAWDDSLNGVVFGGGTKVTVLQPKAAPSVTLFPPSSEELQANKATLVCLISDFYPGAVTVAWKADSSPVKAGVETTTPSKQSNNKYAASSYLSLTPEQWKSHRSYSCQVTHEGVEKTVAPTEQLQQSGAEVKKPGSSVKVSCKASGGTFSSYAISWVRQAPGQGLEWMGGIIPTFGTANYAQKFQGRVTITADESTSTAYMELSSLRSEDTAVYYCAQGPIVGAPTDYWGKGTLVTVSSASTKGPSVFPLAPGTAALGCLVKDYFPEPVTVSWNSGALTSGVHTFPAVLQSSGLYSLSSVVTVPSSTQTYICNVNHKPSNTKVDKRV
[92, 91, 131, 54, 21, 201, 9, 202, 9, 130, 54, 21]

In [5]:
# Align PDB to Reference
ref_seq = 'TAGELYQRWERYRRECQETLAAAEPPSGLACNGSFDMYVCWDYAAPNATARASCPWYLPWHHHVAAGFVLRQCGSDGQWGLWRDHTQCENPETAGELYQRWERYRRECQETLAAAEPPSGLACNGSFDMYVCWDYAAPNATARASCPWYLPWHHHVAAGFVLRQCGSDGQWGLWRDHTQCENPQLQQSGAEVKKPGSSVKVSCKASGGTFSSYAISWVRQAPGQGLEWMGGIIPTFGTANYAQKFQGRVTITADESTSTAYMELSSLRSEDTAVYYCAQGPIVGAPTDYWGKGTLVTVSSASTKGPSVFPLAPSGTAALGCLVKDYFPEPVTVSWNSGALTSGVHTFPAVLQSSGLYSLSSVVTVPSSTQTYICNVNHKPSNTKVDKRVSYVLTQPPSASGTPGQRVAISCSGSNSNIGSNTVHWYQQLPGAAPKLLIYSNNQRPSGVPDRFSGSNSGTSASLAISRLQSEDEADYYCAAWDDSLNGVVFGGGTKVTVLQPKAAPSVTLFPPSSEELQANKATLVCLISDFYPGAVTVAWKADSSPVKAGVETTTPSKQSNNKYAASSYLSLTPEQWKSHRSYSCQVTHEVEKTVAPTESYVLTQPPSASGTPGQRVAISCSGSNSNIGSNTVHWYQQLPGAAPKLLIYSNNQRPSGVPDRFSGSNSGTSASLAISRLQSEDEADYYCAAWDDSLNGVVFGGGTKVTVLQPKAAPSVTLFPPSSEELQANKATLVCLISDFYPGAVTVAWKADSSPVKAGVETTTPSKQSNNKYAASSYLSLTPEQWKSHRSYSCQVTHEGVEKTVAPTEQLQQSGAEVKKPGSSVKVSCKASGGTFSSYAISWVRQAPGQGLEWMGGIIPTFGTANYAQKFQGRVTITADESTSTAYMELSSLRSEDTAVYYCAQGPIVGAPTDYWGKGTLVTVSSASTKGPSVFPLAPGTAALGCLVKDYFPEPVTVSWNSGALTSGVHTFPAVLQSSGLYSLSSVVTVPSSTQTYICNVNHKPSNTKVDKRV'
pdb_seq = 'GELYQRWERYRRECQETLAAAEPPSGLACNGSFDMYVCW'

align = pairwise2.align.localms(ref_seq, pdb_seq, 2, -1, -.5, -.1)
ref_seq_a, pdb_seq_a, _, _, _ = align[0]

print(ref_seq_a)
print('\n')
print(pdb_seq_a)

with open('aligned_out.txt', 'w') as out_f:
    out_f.write('>ref_group_1\n')
    out_f.write(ref_seq_a)
    out_f.write('\n\n')
    out_f.write('>pdb_seq_2128-1\n')
    out_f.write(pdb_seq_a)


TAGELYQRWERYRRECQETLAAAEPPSGLACNGSFDMYVCWDYAAPNATARASCPWYLPWHHHVAAGFVLRQCGSDGQWGLWRDHTQCENPETAGELYQRWERYRRECQETLAAAEPPSGLACNGSFDMYVCWDYAAPNATARASCPWYLPWHHHVAAGFVLRQCGSDGQWGLWRDHTQCENPQLQQSGAEVKKPGSSVKVSCKASGGTFSSYAISWVRQAPGQGLEWMGGIIPTFGTANYAQKFQGRVTITADESTSTAYMELSSLRSEDTAVYYCAQGPIVGAPTDYWGKGTLVTVSSASTKGPSVFPLAPSGTAALGCLVKDYFPEPVTVSWNSGALTSGVHTFPAVLQSSGLYSLSSVVTVPSSTQTYICNVNHKPSNTKVDKRVSYVLTQPPSASGTPGQRVAISCSGSNSNIGSNTVHWYQQLPGAAPKLLIYSNNQRPSGVPDRFSGSNSGTSASLAISRLQSEDEADYYCAAWDDSLNGVVFGGGTKVTVLQPKAAPSVTLFPPSSEELQANKATLVCLISDFYPGAVTVAWKADSSPVKAGVETTTPSKQSNNKYAASSYLSLTPEQWKSHRSYSCQVTHEVEKTVAPTESYVLTQPPSASGTPGQRVAISCSGSNSNIGSNTVHWYQQLPGAAPKLLIYSNNQRPSGVPDRFSGSNSGTSASLAISRLQSEDEADYYCAAWDDSLNGVVFGGGTKVTVLQPKAAPSVTLFPPSSEELQANKATLVCLISDFYPGAVTVAWKADSSPVKAGVETTTPSKQSNNKYAASSYLSLTPEQWKSHRSYSCQVTHEGVEKTVAPTEQLQQSGAEVKKPGSSVKVSCKASGGTFSSYAISWVRQAPGQGLEWMGGIIPTFGTANYAQKFQGRVTITADESTSTAYMELSSLRSEDTAVYYCAQGPIVGAPTDYWGKGTLVTVSSASTKGPSVFPLAPGTAALGCLVKDYFPEPVTVSWNSGALTSGVHTFPAVLQSSGLYSLSSVVTVPSSTQTYICNVNHKPSNTKVDKRV


----------------------------------------------------------------------------------------------GELYQRWERYRRECQETLAAAEPPSGLACNGSFDMYVCW------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

In [ ]:
# Align Clone to Reference
clone_seq_1 = 'TAGELYQRWERYRRECQETLAAAEPPSGLACNGSFDMYVCWDYAAPNATARASCPWYLPWHHHVAAGFVLRQCGSDGQWGLWRDHTQCENPETAGELYQRWERYRRECQETLAAAEPPSGLACNGSFDMYVCWDYAAPNATARASCPWYLPWHHHVAAGFVLRQCGSDGQWGLWRDHTQCENPQLQQSGAEVKKPGSSVKVSCKASGGTFSSYAISWVRQAPGQGLEWMGGIIPTFGTANYAQKFQGRVTITADESTSTAYMELSSLRSEDTAVYYCAQGPIVGAPTDYWGKGTLVTVSSASTKGPSVFPLAPSGTAALGCLVKDYFPEPVTVSWNSGALTSGVHTFPAVLQSSGLYSLSSVVTVPSSTQTYICNVNHKPSNTKVDKRVSYVLTQPPSASGTPGQRVAISCSGSNSNIGSNTVHWYQQLPGAAPKLLIYSNNQRPSGVPDRFSGSNSGTSASLAISRLQSEDEADYYCAAWDDSLNGVVFGGGTKVTVLQPKAAPSVTLFPPSSEELQANKATLVCLISDFYPGAVTVAWKADSSPVKAGVETTTPSKQSNNKYAASSYLSLTPEQWKSHRSYSCQVTHEVEKTVAPTESYVLTQPPSASGTPGQRVAISCSGSNSNIGSNTVHWYQQLPGAAPKLLIYSNNQRPSGVPDRFSGSNSGTSASLAISRLQSEDEADYYCAAWDDSLNGVVFGGGTKVTVLQPKAAPSVTLFPPSSEELQANKATLVCLISDFYPGAVTVAWKADSSPVKAGVETTTPSKQSNNKYAASSYLSLTPEQWKSHRSYSCQVTHEGVEKTVAPTEQLQQSGAEVKKPGSSVKVSCKASGGTFSSYAISWVRQAPGQGLEWMGGIIPTFGTANYAQKFQGRVTITADESTSTAYMELSSLRSEDTAVYYCAQGPIVGAPTDYWGKGTLVTVSSASTKGPSVFPLAPGTAALGCLVKDYFPEPVTVSWNSGALTSGVHTFPAVLQSSGLYSLSSVVTVPSSTQTYICNVNHKPSNTKVDKRV'
align = pairwise2.align.localms(ref_seq, clone_seq_1, 2, -1, -.5, -.1)
ref_seq_a, clone_seq_1, _, _, _ = align[0]

print(ref_seq_a)
print('\n')
print(clone_seq_1)

with open('aligned_out.txt', 'w') as out_f:
    out_f.write(ref_seq_a)
    out_f.write('\n\n')
    out_f.write(clone_seq_1)

In [ ]:
# Multiple Alignment

## Create a new fasta file
fasta_div = '>'
seq_1 = 'TAGELYQRWERYRRECQETLAAAEPPSGLACNGSFDMYVCWDYAAPNATARASCPWYLPWHHHVAAGFVLRQCGSDGQWGLWRDHTQCENPETAGELYQRWERYRRECQETLAAAEPPSGLACNGSFDMYVCWDYAAPNATARASCPWYLPWHHHVAAGFVLRQCGSDGQWGLWRDHTQCENPQLQQSGAEVKKPGSSVKVSCKASGGTFSSYAISWVRQAPGQGLEWMGGIIPTFGTANYAQKFQGRVTITADESTSTAYMELSSLRSEDTAVYYCAQGPIVGAPTDYWGKGTLVTVSSASTKGPSVFPLAPSGTAALGCLVKDYFPEPVTVSWNSGALTSGVHTFPAVLQSSGLYSLSSVVTVPSSTQTYICNVNHKPSNTKVDKRVSYVLTQPPSASGTPGQRVAISCSGSNSNIGSNTVHWYQQLPGAAPKLLIYSNNQRPSGVPDRFSGSNSGTSASLAISRLQSEDEADYYCAAWDDSLNGVVFGGGTKVTVLQPKAAPSVTLFPPSSEELQANKATLVCLISDFYPGAVTVAWKADSSPVKAGVETTTPSKQSNNKYAASSYLSLTPEQWKSHRSYSCQVTHEVEKTVAPTESYVLTQPPSASGTPGQRVAISCSGSNSNIGSNTVHWYQQLPGAAPKLLIYSNNQRPSGVPDRFSGSNSGTSASLAISRLQSEDEADYYCAAWDDSLNGVVFGGGTKVTVLQPKAAPSVTLFPPSSEELQANKATLVCLISDFYPGAVTVAWKADSSPVKAGVETTTPSKQSNNKYAASSYLSLTPEQWKSHRSYSCQVTHEGVEKTVAPTEQLQQSGAEVKKPGSSVKVSCKASGGTFSSYAISWVRQAPGQGLEWMGGIIPTFGTANYAQKFQGRVTITADESTSTAYMELSSLRSEDTAVYYCAQGPIVGAPTDYWGKGTLVTVSSASTKGPSVFPLAPGTAALGCLVKDYFPEPVTVSWNSGALTSGVHTFPAVLQSSGLYSLSSVVTVPSSTQTYICNVNHKPSNTKVDKRV'
seq_2 = 'TAGELYQRWERYRRECQETLAAAEPPSGLACNGSFDMYVCWDYAAPNATARASCPWYLPWHHHVAAGFVLRQCGSDGQWGLWRDHTQCENPETAGELYQRWERYRRECQETLAAAEPPSGLACNGSFDMYVCWDYAAPNATARASCPWYLPWHHHVAAGFVLRQCGSDGQWGLWRDHTQCENPQLQQSGAEVKKPGSSVKVSCKASGGTFSSYAISWVRQAPGQGLEWMGGIIPTFGTANYAQKFQGRVTITADESTSTAYMELSSLRSEDTAVYYCAQGPIVGAPTDYWGKGTLVTVSSASTKGPSVFPLAPSGTAALGCLVKDYFPEPVTVSWNSGALTSGVHTFPAVLQSSGLYSLSSVVTVPSSTQTYICNVNHKPSNTKVDKRVSYVLTQPPSASGTPGQRVAISCSGSNSNIGSNTVHWYQQLPGAAPKLLIYSNNQRPSGVPDRFSGSNSGTSASLAISRLQSEDEADYYCAAWDDSLNGVVFGGGTKVTVLQPKAAPSVTLFPPSSEELQANKATLVCLISDFYPGAVTVAWKADSSPVKAGVETTTPSKQSNNKYAASSYLSLTPEQWKSHRSYSCQVTHEVEKTVAPTESYVLTQPPSASGTPGQRVAISCSGSNSNIGSNTVHWYQQLPGAAPKLLIYSNNQRPSGVPDRFSGSNSGTSASLAISRLQSEDEADYYCAAWDDSLNGVVFGGGTKVTVLQPKAAPSVTLFPPSSEELQANKATLVCLISDFYPGAVTVAWKADSSPVKAGVETTTPSKQSNNKYAASSYLSLTPEQWKSHRSYSCQVTHEGVEKTVAPTEQLQQSGAEVKKPGSSVKVSCKASGGTFSSYAISWVRQAPGQGLEWMGGIIPTFGTANYAQKFQGRVTITADESTSTAYMELSSLRSEDTAVYYCAQGPIVGAPTDYWGKGTLVTVSSASTKGPSVFPLAPGTAALGCLVKDYFPEPVTVSWNSGALTSGVHTFPAVLQSSGLYSLSSVVTVPSSTQTYICNVNHKPSNTKVDKRV'
seq_3 = 'TAGELYQRWERYRRECQETLAAAEPPSGLACNGSFDMYVCWDYAAPNATARASCPWYLPWHHHVAAGFVLRQCGSDGQWGLWRDHTQCENPETAGELYQRWERYRRECQETLAAAEPPSGLACNGSFDMYVCWDYAAPNATARASCPWYLPWHHHVAAGFVLRQCGSDGQWGLWRDHTQCENPQLQQSGAEVKKPGSSVKVSCKASGGTFSSYAISWVRQAPGQGLEWMGGIIPTFGTANYAQKFQGRVTITADESTSTAYMELSSLRSEDTAVYYCAQGPIVGAPTDYWGKGTLVTVSSASTKGPSVFPLAPSGTAALGCLVKDYFPEPVTVSWNSGALTSGVHTFPAVLQSSGLYSLSSVVTVPSSTQTYICNVNHKPSNTKVDKRVSYVLTQPPSASGTPGQRVAISCSGSNSNIGSNTVHWYQQLPGAAPKLLIYSNNQRPSGVPDRFSGSNSGTSASLAISRLQSEDEADYYCAAWDDSLNGVVFGGGTKVTVLQPKAAPSVTLFPPSSEELQANKATLVCLISDFYPGAVTVAWKADSSPVKAGVETTTPSKQSNNKYAASSYLSLTPEQWKSHRSYSCQVTHEVEKTVAPTESYVLTQPPSASGTPGQRVAISCSGSNSNIGSNTVHWYQQLPGAAPKLLIYSNNQRPSGVPDRFSGSNSGTSASLAISRLQSEDEADYYCAAWDDSLNGVVFGGGTKVTVLQPKAAPSVTLFPPSSEELQANKATLVCLISDFYPGAVTVAWKADSSPVKAGVETTTPSKQSNNKYAASSYLSLTPEQWKSHRSYSCQVTHEGVEKTVAPTEQLQQSGAEVKKPGSSVKVSCKASGGTFSSYAISWVRQAPGQGLEWMGGIIPTFGTANYAQKFQGRVTITADESTSTAYMELSSLRSEDTAVYYCAQGPIVGAPTDYWGKGTLVTVSSASTKGPSVFPLAPGTAALGCLVKDYFPEPVTVSWNSGALTSGVHTFPAVLQSSGLYSLSSVVTVPSSTQTYICNVNHKPSNTKVDKRV'
seq_4 = 'TAGELYQRWERYRRECQETLAAAEPPSGLACNGSFDMYVCWDYAAPNATARASCPWYLPWHHHVAAGFVLRQCGSDGQWGLWRDHTQCENPETAGELYQRWERYRRECQETLAAAEPPSGLACNGSFDMYVCWDYAAPNATARASCPWYLPWHHHVAAGFVLRQCGSDGQWGLWRDHTQCENPQLQQSGAEVKKPGSSVKVSCKASGGTFSSYAISWVRQAPGQGLEWMGGIIPTFGTANYAQKFQGRVTITADESTSTAYMELSSLRSEDTAVYYCAQGPIVGAPTDYWGKGTLVTVSSASTKGPSVFPLAPSGTAALGCLVKDYFPEPVTVSWNSGALTSGVHTFPAVLQSSGLYSLSSVVTVPSSTQTYICNVNHKPSNTKVDKRVSYVLTQPPSASGTPGQRVAISCSGSNSNIGSNTVHWYQQLPGAAPKLLIYSNNQRPSGVPDRFSGSNSGTSASLAISRLQSEDEADYYCAAWDDSLNGVVFGGGTKVTVLQPKAAPSVTLFPPSSEELQANKATLVCLISDFYPGAVTVAWKADSSPVKAGVETTTPSKQSNNKYAASSYLSLTPEQWKSHRSYSCQVTHEVEKTVAPTESYVLTQPPSASGTPGQRVAISCSGSNSNIGSNTVHWYQQLPGAAPKLLIYSNNQRPSGVPDRFSGSNSGTSASLAISRLQSEDEADYYCAAWDDSLNGVVFGGGTKVTVLQPKAAPSVTLFPPSSEELQANKATLVCLISDFYPGAVTVAWKADSSPVKAGVETTTPSKQSNNKYAASSYLSLTPEQWKSHRSYSCQVTHEGVEKTVAPTEQLQQSGAEVKKPGSSVKVSCKASGGTFSSYAISWVRQAPGQGLEWMGGIIPTFGTANYAQKFQGRVTITADESTSTAYMELSSLRSEDTAVYYCAQGPIVGAPTDYWGKGTLVTVSSASTKGPSVFPLAPGTAALGCLVKDYFPEPVTVSWNSGALTSGVHTFPAVLQSSGLYSLSSVVTVPSSTQTYICNVNHKPSNTKVDKRV'
seq_5 = 'TAGELYQRWERYRRECQETLAAAEPPSGLACNGSFDMYVCWDYAAPNATARASCPWYLPWHHHVAAGFVLRQCGSDGQWGLWRDHTQCENPETAGELYQRWERYRRECQETLAAAEPPSGLACNGSFDMYVCWDYAAPNATARASCPWYLPWHHHVAAGFVLRQCGSDGQWGLWRDHTQCENPQLQQSGAEVKKPGSSVKVSCKASGGTFSSYAISWVRQAPGQGLEWMGGIIPTFGTANYAQKFQGRVTITADESTSTAYMELSSLRSEDTAVYYCAQGPIVGAPTDYWGKGTLVTVSSASTKGPSVFPLAPSGTAALGCLVKDYFPEPVTVSWNSGALTSGVHTFPAVLQSSGLYSLSSVVTVPSSTQTYICNVNHKPSNTKVDKRVSYVLTQPPSASGTPGQRVAISCSGSNSNIGSNTVHWYQQLPGAAPKLLIYSNNQRPSGVPDRFSGSNSGTSASLAISRLQSEDEADYYCAAWDDSLNGVVFGGGTKVTVLQPKAAPSVTLFPPSSEELQANKATLVCLISDFYPGAVTVAWKADSSPVKAGVETTTPSKQSNNKYAASSYLSLTPEQWKSHRSYSCQVTHEVEKTVAPTESYVLTQPPSASGTPGQRVAISCSGSNSNIGSNTVHWYQQLPGAAPKLLIYSNNQRPSGVPDRFSGSNSGTSASLAISRLQSEDEADYYCAAWDDSLNGVVFGGGTKVTVLQPKAAPSVTLFPPSSEELQANKATLVCLISDFYPGAVTVAWKADSSPVKAGVETTTPSKQSNNKYAASSYLSLTPEQWKSHRSYSCQVTHEGVEKTVAPTEQLQQSGAEVKKPGSSVKVSCKASGGTFSSYAISWVRQAPGQGLEWMGGIIPTFGTANYAQKFQGRVTITADESTSTAYMELSSLRSEDTAVYYCAQGPIVGAPTDYWGKGTLVTVSSASTKGPSVFPLAPGTAALGCLVKDYFPEPVTVSWNSGALTSGVHTFPAVLQSSGLYSLSSVVTVPSSTQTYICNVNHKPSNTKVDKRV'
seq_6 = 'TAGELYQRWERYRRECQETLAAAEPPSGLACNGSFDMYVCWDYAAPNATARASCPWYLPWHHHVAAGFVLRQCGSDGQWGLWRDHTQCENPETAGELYQRWERYRRECQETLAAAEPPSGLACNGSFDMYVCWDYAAPNATARASCPWYLPWHHHVAAGFVLRQCGSDGQWGLWRDHTQCENPQLQQSGAEVKKPGSSVKVSCKASGGTFSSYAISWVRQAPGQGLEWMGGIIPTFGTANYAQKFQGRVTITADESTSTAYMELSSLRSEDTAVYYCAQGPIVGAPTDYWGKGTLVTVSSASTKGPSVFPLAPSGTAALGCLVKDYFPEPVTVSWNSGALTSGVHTFPAVLQSSGLYSLSSVVTVPSSTQTYICNVNHKPSNTKVDKRVSYVLTQPPSASGTPGQRVAISCSGSNSNIGSNTVHWYQQLPGAAPKLLIYSNNQRPSGVPDRFSGSNSGTSASLAISRLQSEDEADYYCAAWDDSLNGVVFGGGTKVTVLQPKAAPSVTLFPPSSEELQANKATLVCLISDFYPGAVTVAWKADSSPVKAGVETTTPSKQSNNKYAASSYLSLTPEQWKSHRSYSCQVTHEVEKTVAPTESYVLTQPPSASGTPGQRVAISCSGSNSNIGSNTVHWYQQLPGAAPKLLIYSNNQRPSGVPDRFSGSNSGTSASLAISRLQSEDEADYYCAAWDDSLNGVVFGGGTKVTVLQPKAAPSVTLFPPSSEELQANKATLVCLISDFYPGAVTVAWKADSSPVKAGVETTTPSKQSNNKYAASSYLSLTPEQWKSHRSYSCQVTHEGVEKTVAPTEQLQQSGAEVKKPGSSVKVSCKASGGTFSSYAISWVRQAPGQGLEWMGGIIPTFGTANYAQKFQGRVTITADESTSTAYMELSSLRSEDTAVYYCAQGPIVGAPTDYWGKGTLVTVSSASTKGPSVFPLAPGTAALGCLVKDYFPEPVTVSWNSGALTSGVHTFPAVLQSSGLYSLSSVVTVPSSTQTYICNVNHKPSNTKVDKRV'
seq_7 = 'TAGELYQRWERYRRECQETLAAAEPPSGLACNGSFDMYVCWDYAAPNATARASCPWYLPWHHHVAAGFVLRQCGSDGQWGLWRDHTQCENPETAGELYQRWERYRRECQETLAAAEPPSGLACNGSFDMYVCWDYAAPNATARASCPWYLPWHHHVAAGFVLRQCGSDGQWGLWRDHTQCENPQLQQSGAEVKKPGSSVKVSCKASGGTFSSYAISWVRQAPGQGLEWMGGIIPTFGTANYAQKFQGRVTITADESTSTAYMELSSLRSEDTAVYYCAQGPIVGAPTDYWGKGTLVTVSSASTKGPSVFPLAPSGTAALGCLVKDYFPEPVTVSWNSGALTSGVHTFPAVLQSSGLYSLSSVVTVPSSTQTYICNVNHKPSNTKVDKRVSYVLTQPPSASGTPGQRVAISCSGSNSNIGSNTVHWYQQLPGAAPKLLIYSNNQRPSGVPDRFSGSNSGTSASLAISRLQSEDEADYYCAAWDDSLNGVVFGGGTKVTVLQPKAAPSVTLFPPSSEELQANKATLVCLISDFYPGAVTVAWKADSSPVKAGVETTTPSKQSNNKYAASSYLSLTPEQWKSHRSYSCQVTHEVEKTVAPTESYVLTQPPSASGTPGQRVAISCSGSNSNIGSNTVHWYQQLPGAAPKLLIYSNNQRPSGVPDRFSGSNSGTSASLAISRLQSEDEADYYCAAWDDSLNGVVFGGGTKVTVLQPKAAPSVTLFPPSSEELQANKATLVCLISDFYPGAVTVAWKADSSPVKAGVETTTPSKQSNNKYAASSYLSLTPEQWKSHRSYSCQVTHEGVEKTVAPTEQLQQSGAEVKKPGSSVKVSCKASGGTFSSYAISWVRQAPGQGLEWMGGIIPTFGTANYAQKFQGRVTITADESTSTAYMELSSLRSEDTAVYYCAQGPIVGAPTDYWGKGTLVTVSSASTKGPSVFPLAPGTAALGCLVKDYFPEPVTVSWNSGALTSGVHTFPAVLQSSGLYSLSSVVTVPSSTQTYICNVNHKPSNTKVDKRV'

seqs = OrderedDict([
        ('pdb_seq', pdb_seq),
        ('ref_seq', ref_seq),
        ('2130', seq_1),
        ('2131', seq_2),
        ('2133', seq_3),
        ('2699', seq_4),
        ('2701', seq_5),
        ('2703', seq_6),
        ('2704', seq_7)])

seqs_keys = seqs.keys()

# build the fasta file
with open('create.fasta', 'w') as create_fasta:
    for seq_id, seq in seqs.items():
        create_fasta.write('{}{}\n'.format(fasta_div, seq_id))
        create_fasta.write(seq)
        create_fasta.write('\n')
    

## Multipe align the file
clustal_out = 'clustal_out.fasta'
from Bio.Align.Applications import ClustalwCommandline
clustalw_cline = ClustalwCommandline('/usr/local/Cellar/clustal-w/2.1/bin/clustalw2',
                                     infile='create.fasta', outfile=clustal_out)
print(clustalw_cline)
stdout, stderr = clustalw_cline()
# print(stdout, '\n', stderr)

## Let's read seqs in a nicer format
from Bio import AlignIO
align = AlignIO.read(clustal_out, "clustal")


for record in align:
    seqs[record.id] = str(record.seq)
    
req_seq_aligned = seqs['ref_seq']
pdb_seq_aligned = seqs['pdb_seq']

map_pdb_to_ref = OrderedDict()
pdb_counter = 1
chain_name = 'L'
for i, aa in enumerate(pdb_seq_aligned):
    if aa != '-':
        map_pdb_to_ref['{}.{}'.format(pdb_counter, chain_name)] = [aa, req_seq_aligned[i], i+1]
        if pdb_counter == 107 and chain_name == 'L':
            pdb_counter = 0
            chain_name = 'H'
        pdb_counter += 1
        
# pretty.pprint(map_pdb_to_ref)

import json
print json.dumps(map_pdb_to_ref, indent=4)

In [ ]:
# Local Alignment
from Bio import pairwise2
from Bio.SubsMat.MatrixInfo import blosum62
from Bio.Seq import Seq

seq1 = 'TAGELYQRWERYRRECQETLAAAEPPSGLACNGSFDMYVCWDYAAPNATARASCPWYLPWHHHVAAGFVLRQCGSDGQWGLWRDHTQCENPETAGELYQRWERYRRECQETLAAAEPPSGLACNGSFDMYVCWDYAAPNATARASCPWYLPWHHHVAAGFVLRQCGSDGQWGLWRDHTQCENPQLQQSGAEVKKPGSSVKVSCKASGGTFSSYAISWVRQAPGQGLEWMGGIIPTFGTANYAQKFQGRVTITADESTSTAYMELSSLRSEDTAVYYCAQGPIVGAPTDYWGKGTLVTVSSASTKGPSVFPLAPSGTAALGCLVKDYFPEPVTVSWNSGALTSGVHTFPAVLQSSGLYSLSSVVTVPSSTQTYICNVNHKPSNTKVDKRVSYVLTQPPSASGTPGQRVAISCSGSNSNIGSNTVHWYQQLPGAAPKLLIYSNNQRPSGVPDRFSGSNSGTSASLAISRLQSEDEADYYCAAWDDSLNGVVFGGGTKVTVLQPKAAPSVTLFPPSSEELQANKATLVCLISDFYPGAVTVAWKADSSPVKAGVETTTPSKQSNNKYAASSYLSLTPEQWKSHRSYSCQVTHEVEKTVAPTESYVLTQPPSASGTPGQRVAISCSGSNSNIGSNTVHWYQQLPGAAPKLLIYSNNQRPSGVPDRFSGSNSGTSASLAISRLQSEDEADYYCAAWDDSLNGVVFGGGTKVTVLQPKAAPSVTLFPPSSEELQANKATLVCLISDFYPGAVTVAWKADSSPVKAGVETTTPSKQSNNKYAASSYLSLTPEQWKSHRSYSCQVTHEGVEKTVAPTEQLQQSGAEVKKPGSSVKVSCKASGGTFSSYAISWVRQAPGQGLEWMGGIIPTFGTANYAQKFQGRVTITADESTSTAYMELSSLRSEDTAVYYCAQGPIVGAPTDYWGKGTLVTVSSASTKGPSVFPLAPGTAALGCLVKDYFPEPVTVSWNSGALTSGVHTFPAVLQSSGLYSLSSVVTVPSSTQTYICNVNHKPSNTKVDKRV'
seq2 = 'ATARASCPWYLPWHHHVAAGFVLRQCGSDGQWGLWRDHTQCENPETAGELYQRWERYRRECQETLAAAEPPSGLACNGSFDMYVCWDYAAPNATARASCPWYLPWHHHVAAGFVLRQCGSDGQWGLWRDHTQCENPQLQQSGAEVKKPGSSVKVSCKASGGTFSSYAISWVRQAPGQGLEWMGGIIPTFGTANYAQKFQGRVTITADESTSTAYMELSSLRSEDTAVYYCAQGPIVGAPTDYWGKGTLVTVSSASTKGPSVFPLAPSGTAALGCLVKDYFPEPVTVSWNSGALTSGVHTFPAVLQSSGLYSLSSVVTVPSSTQTYICNVNHKPSNTKVDKRVSYVLTQPPSASGTPGQRVAISCSGSNSNIGSNTVHWYQQLPGAAPKLLIYSNNQRPSGVPDRFSGSNSGTSASLAISRLQSEDEADYYCAAWDDSLNGVVFGGGTKVTVLQPKAAPSVTLFPPSSEELQANKATLVCLISDFYPGAVTVAWKADSSPVKAGVETTTPSKQSNNKYAASSYLSLTPEQWKSHRSYSCQVTHEVEKTVAPTESYVLTQPPSASGTPGQRVAISCSGSNSNIGSNTVHWYQQLPGAAPKLLIYSNNQRPSGVPDRFSGSNSGTSASLAISRLQSEDEADYYCAAWDDSLNGVVFGGGTKVTVLQPKAAPSVTLFPPSSEELQANKATLVCLISDFYPGAVTVAWKADSSPVKAGVETTTPSKQSNNKYAASSYLSLTPEQWKSHRSYSCQVTHEGVEKTVAPTEQLQQSGAEVKKPGSSVKVSCKASGGTFSSYAISWVRQAPGQGLEWMGGIIPTFGTANYAQKFQGRVTITADESTSTAYMELSSLRSEDTAVYYCAQGPIVGAPTDYWGKG'

ref_seq = Seq(seq1.replace("-", ""))
pdb_seq = Seq(seq2.replace("-", ""))

alignments = pairwise2.align.globalds(ref_seq, pdb_seq, blosum62, -10, -0.5)
print(pairwise2.format_alignment(*alignments[0]))