In [126]:
import os
import sys
import random
import time
from random import seed, randint
import argparse
import platform
from datetime import datetime
import imp
import numpy as np
import fileinput
from itertools import product
import pandas as pd
from scipy.interpolate import griddata
from scipy.interpolate import interp2d
import seaborn as sns
from os import listdir

import matplotlib.pyplot as plt
import seaborn as sns
from scipy.interpolate import griddata
import matplotlib as mpl
sys.path.insert(0,'..')
# from notebookFunctions import *
# from .. import notebookFunctions

%matplotlib inline
plt.rcParams['figure.figsize'] = (10,6.180)    #golden ratio
# %matplotlib notebook
%load_ext autoreload
%autoreload 2

import sys
sys.path.insert(0, '/Users/weilu/openmmawsem/')

from openmmawsem import *


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload

In [2]:
os.chdir("/Users/weilu/openmmawsem/_local/quick_run_nov16/")

In [3]:
def download(pdb_id):
    if not os.path.isfile(f"{pdb_id}.pdb"):
        PDBList().retrieve_pdb_file(pdb_id.lower(), pdir='.', file_format='pdb')
        os.rename("pdb%s.ent" % pdb_id, f"{pdb_id}.pdb")

In [4]:
simulation_platform = "OpenCL" # OpenCL, CUDA, CPU, or Reference
pdb_id = '2xov'
pdb = f"{pdb_id}.pdb"
chain='A'

download(pdb_id)
os.system("cp /Users/weilu/opt/parameters/globular_parameters/burial_gamma.dat .")
os.system("cp /Users/weilu/opt/parameters/globular_parameters/gamma.dat .")
input_pdb_filename, cleaned_pdb_filename = prepare_pdb(pdb, chain)
ensure_atom_order(input_pdb_filename)
getSeqFromCleanPdb(input_pdb_filename, chains='A')


Reorder atom position
Original, Changed to
1042 N ,  1042 N
1043 H ,  1043 H
1044 C ,  1046 CA
1045 O ,  1044 C
1046 CA ,  1045 O
1047 CB ,  1047 CB
Out[4]:
'ERAGPVTWVMMIACVVVFIAMQILGDQEVMLWLAWPFDPTLKFEFWRYFTHALMHFSLMHILFNLLWWWYLGGAVEKRLGSGKLIVITLISALLSGYVQQKFSGPWFGGLSGVVYALMGYVWLRGERDPQSGIYLQRGLIIFALIWIVAGWFDLFGMSMANGAHIAGLAVGLAMAFVDSLN'

In [92]:
# setup system
os.system("cp ~/opt/database/cullpdb_pc80_* .")
os.system("python2 ~/opt/script/MultCha_prepFrags_index.py \
    cullpdb_pc80_res3.0_R1.0_d160504_chains29712 %s.fasta 20 1 9 > logfile" % pdb_id)


Out[92]:
0

In [125]:
reporter_frequency = 1000
oa = OpenMMAWSEMSystem(input_pdb_filename, k_awsem=1.0, xml_filename="../../awsem.xml") # k_awsem is an overall scaling factor that will affect the relevant temperature scales

# apply forces
forces = [
    oa.con_term(),
    oa.chain_term(),
    oa.chi_term(),
    oa.excl_term(),
    oa.rama_term(),
    oa.rama_proline_term(),
    oa.direct_term(),
    oa.burial_term(),
    oa.mediated_term(),
    oa.fragment_memory_term(frag_location_pre="./")
]
oa.addForces(forces)

# start simulation
collision_rate = 5.0 / picoseconds

integrator = LangevinIntegrator(300*kelvin, 1/picosecond, 2*femtoseconds)
simulation = Simulation(oa.pdb.topology, oa.system, integrator, Platform.getPlatformByName(simulation_platform))
simulation.context.setPositions(oa.pdb.positions) # set the initial positions of the atoms
# simulation.context.setVelocitiesToTemperature(300*kelvin) # set the initial velocities of the atoms according to the desired starting temperature
simulation.minimizeEnergy() # first, minimize the energy to a local minimum to reduce any large forces that might be present
simulation.reporters.append(StateDataReporter(stdout, reporter_frequency, step=True, potentialEnergy=True, temperature=True)) # output energy and temperature during simulation
simulation.reporters.append(PDBReporter("movie.pdb", reporter_frequency)) # output PDBs of simulated structures
simulation.step(int(1e4))
# simulation.reporters.append(CheckpointReporter(checkpoint_file, checkpoint_reporter_frequency)) # save progress during the simulation


Reading Fragment table. from ./frag_table.npy.
#"Step","Potential Energy (kJ/mole)","Temperature (K)"
1000,-4439.0380859375,276.7474416253649
2000,-4391.15234375,299.1971234907612
3000,-4686.6181640625,303.72159831439734
4000,-4656.96044921875,305.3432530305844
5000,-4692.427734375,292.81708005238204
6000,-4761.015625,284.509277566068
7000,-4744.38623046875,294.06852027668566
8000,-4787.18115234375,298.5820618815759
9000,-4800.2548828125,295.5694725023697
10000,-4942.6005859375,287.2937044496808

In [15]:
# when simulation is complete, compute order parameters and energies
oa = OpenMMAWSEMSystem(input_pdb_filename, k_awsem=1.0, xml_filename="../../awsem.xml") # k_awsem is an overall scaling factor that will affect the relevant q scales
order_parameters_to_compute = {
    "Qvalue": oa.q_value(cleaned_pdb_filename, chain, min_seq_sep=3, max_seq_sep=np.inf), 
#       "Con": oa.con_term(k_con=k_con),
#     "Chain": oa.chain_term(k_chain=k_chain),
#     "Chi": oa.chi_term(k_chi=k_chi),
#     "Excl": oa.excl_term(k_excl=k_excl),
#     "Rama": oa.rama_term(k_rama=k_rama),
#     "RamaPro": oa.rama_proline_term(k_rama_proline=k_rama_proline),
#     "ddAM": oa.density_dependent_associative_memory_term(memories, k_am_dd=k_am_dd, am_dd_min_seq_sep=am_dd_min_seq_sep, am_dd_max_seq_sep=am_dd_max_seq_sep, density_alpha=density_alpha, density_normalization=density_normalization, rho0=rho0, density_min_seq_sep=density_min_seq_sep, am_well_width=am_well_width, density_only_from_native_contacts=density_only_from_native_contacts, density_pdb_file=density_pdb_file, density_chain_name=density_chain_name, density_native_contact_min_seq_sep=density_native_contact_min_seq_sep, density_native_contact_threshold=density_native_contact_threshold),
#     "AMHGo": oa.additive_amhgo_term(cleaned_pdb_filename, amhgo_chain, k_amhgo=k_amhgo, amhgo_min_seq_sep=amhgo_min_seq_sep, amhgo_contact_threshold=amhgo_contact_threshold, amhgo_well_width=amhgo_well_width),
}

order_parameters, mdtraj_order_parameters = compute_order_parameters(input_pdb_filename, "movie.pdb", order_parameters_to_compute.values(), xml_filename="../../awsem.xml")
order_parameter_names = list(order_parameters_to_compute.keys())


---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-15-7b21459014fb> in <module>()
     13 }
     14 
---> 15 order_parameters, mdtraj_order_parameters = compute_order_parameters(input_pdb_filename, "movie.pdb", order_parameters_to_compute.values(), xml_filename="../../awsem.xml")
     16 order_parameter_names = list(order_parameters_to_compute.keys())

~/openmmawsem/openmmawsem.py in compute_order_parameters(openmm_awsem_pdb_file, pdb_trajectory_filename, order_parameters, platform_name, k_awsem, compute_mdtraj, rmsd_reference_structure, compute_total_energy, energy_columns, xml_filename)
   1145         order_parameter_values.append([])
   1146         for i in range(len(pdb_trajectory)):
-> 1147             order_parameter_values[-1].append(np.sum([order_parameter_values[x-1][i] for x in energy_columns]))
   1148     if not compute_mdtraj:
   1149         return np.array(order_parameter_values)

TypeError: 'NoneType' object is not iterable

In [117]:
pdb_trajectory = read_trajectory_pdb_positions("movie.pdb")
oa = OpenMMAWSEMSystem(input_pdb_filename, k_awsem=1.0, xml_filename="../../awsem.xml") # k_awsem is an overall scaling factor that will affect the relevant temperature scales

# apply forces
forces = [
    oa.con_term(),
    oa.chain_term(),
#     oa.chi_term(),
#     oa.excl_term(),
#     oa.rama_term(),
#     oa.rama_proline_term(),
#     oa.direct_term(),
#     oa.burial_term(),
#     oa.mediated_term(),
#     oa.fragment_memory_term(frag_location_pre="./")
]
oa.addForces(forces)

# start simulation
collision_rate = 5.0 / picoseconds

integrator = LangevinIntegrator(300*kelvin, 1/picosecond, 2*femtoseconds)
simulation = Simulation(oa.pdb.topology, oa.system, integrator, Platform.getPlatformByName(simulation_platform))

In [119]:
for pdb in pdb_trajectory:
    simulation.context.setPositions(pdb.positions)
    state = simulation.context.getState(getEnergy=True, groups={2})
    print(state.getPotentialEnergy().value_in_unit(kilojoule_per_mole))


550.2899169921875
498.27032470703125
576.870361328125
582.57080078125
596.3624267578125
553.7578735351562
560.9185791015625
508.0758361816406
549.3898315429688
552.7684326171875

In [116]:
for pdb in pdb_trajectory:
    simulation.context.setPositions(pdb.positions)
    state = simulation.context.getState(getEnergy=True, groups={2})
    print(state.getPotentialEnergy().value_in_unit(kilojoule_per_mole))


550.2899169921875
498.27032470703125
576.870361328125
582.57080078125
596.3624267578125
553.7578735351562
560.9185791015625
508.0758361816406
549.3898315429688
552.7684326171875

In [81]:
60&(1<<5)


Out[81]:
32

In [39]:
1<<3


Out[39]:
8

In [51]:
for pdb in pdb_trajectory:
    simulation.context.setPositions(pdb.positions)
    state = simulation.context.getState(getEnergy=True)
    print(state.getPotentialEnergy().value_in_unit(kilojoule_per_mole))


776.7666015625
773.289794921875
825.0230102539062
753.042236328125
896.6956787109375
829.68505859375
746.64794921875
816.7725830078125
824.6434326171875
772.7034912109375

In [26]:
for pdb in pdb_trajectory:
    simulation.context.setPositions(pdb.positions)
    state = simulation.context.getState(getEnergy=True, groups=10)
    print(state.getPotentialEnergy().value_in_unit(kilojoule_per_mole))


932.834228515625
936.1502685546875
959.645263671875
868.6181640625
1099.9014892578125
982.6143188476562
932.326416015625
981.8992309570312
1006.880615234375
943.318115234375

In [ ]:


In [ ]:
pdb = PDBFile('input.pdb')
forcefield = ForceField('amber14-all.xml', 'amber14/tip3pfb.xml')
system = forcefield.createSystem(pdb.topology, nonbondedMethod=PME, nonbondedCutoff=1*nanometer, constraints=HBonds)
integrator = LangevinIntegrator(300*kelvin, 1/picosecond, 0.002*picoseconds)
simulation = Simulation(pdb.topology, system, integrator)
simulation.context.setPositions(pdb.positions)
simulation.minimizeEnergy()
simulation.reporters.append(PDBReporter('output.pdb', 1000))
simulation.reporters.append(StateDataReporter(stdout, 1000, step=True, potentialEnergy=True, temperature=True))
simulation.step(10000)

In [ ]:


In [10]:
os.system("rm frag_table.npy")


Out[10]:
0

In [ ]:
Reading Fragment table. from ./frag_table.npy.
#"Step","Potential Energy (kJ/mole)","Temperature (K)"
1000,-4510.03955078125,289.49342994803334
2000,-4486.85791015625,286.18904447834353
3000,-4600.8193359375,299.4729686118679
4000,-4694.6376953125,318.32629703025185
5000,-4604.8271484375,308.2551570043043
6000,-4745.29638671875,295.0853847360626
7000,-4794.03515625,311.2622238551447
8000,-4851.67919921875,300.7594704431106
9000,-4852.998046875,296.65048061625595
10000,-4909.064453125,302.2931345473861

In [ ]:
def ensure_atom_order(input_pdb_filename):
    # ensure order of ['n', 'h', 'ca', 'c', 'o', 'cb']
    # to be more specific, this ensure 'ca' always show up before 'c'.
    def first(t):
        return t[0]
    order_table = {'N':0, 'H':1, 'CA':2, 'C':3, 'O':4, 'CB':5}
    one_residue = []
    with open("tmp.pdb", "w") as out:
        with open(input_pdb_filename, "r") as f:
            pre = "CB"
            all_lines = f.readlines()
    #         print(all_lines)
            pre_res_id = all_lines[0].split()[5]
            for line in all_lines:
                info = line.split()
                if info[0]!="ATOM":
                    sorted_residue = sorted(one_residue, key=first)
                    for a in sorted_residue:
                        out.write(a[1])
                    one_residue = []
                    out.write(line)
                    continue
                res_id = info[5]
                if res_id != pre_res_id:
                    pre_res_id = res_id
                    # sort them in order
                    sorted_residue = sorted(one_residue, key=first)
                    for a in sorted_residue:
                        out.write(a[1])
                    if sorted_residue != one_residue:
                        print("Reorder atom position")
                        print("Original, Changed to")
                        for i, t in zip(one_residue, sorted_residue):
                            print(i[2], i[3], ", ", t[2], t[3])
                    one_residue = []
                atomType = info[2]
                one_residue.append((order_table[atomType], line, info[1], atomType))
    os.system(f"mv tmp.pdb {input_pdb_filename}")

In [ ]:
ThreeToOne = {'ALA':'A','ARG':'R','ASN':'N','ASP':'D','CYS':'C','GLU':'E','GLN':'Q','GLY':'G','HIS':'H',
           'ILE':'I','LEU':'L','LYS':'K','MET':'M','PHE':'F','PRO':'P','SER':'S','THR':'T','TRP':'W',
           'TYR':'Y','VAL':'V'}
a = pd.read_table(cleaned_pdb_filename, skiprows=2, sep="\s+", names=["ATOM", "i", "Type", "Res", "Chain", "ResId", "x", "y", "z", "_", "_1", "_2"]).dropna()
chain_list = [i for i in chain]
threeLetterSeq = a.query(f"Chain in @chain_list and Type == 'CA'")["Res"]
seq = "".join([ThreeToOne[i] for i in threeLetterSeq])

In [ ]:


In [1]:
superStrong = [0, 3, 1, 0, 0, 0, 2, 0, 1, 1, 0, 1, 0, 2, 2, 1, 3, 0, 3, 2, 2, 3, 1, 1, 2, 1, 1, 2, 3, 1, 0, 2, 2, 1, 2, 2, 3, 0, 3, 0, 2, 3, 1, 2, 3, 0, 1, 3, 1, 3]
# id 237
smartGuy =    [0, 3, 1, 0, 0, 0, 0, 0, 1, 2, 2, 3, 2, 2, 0, 3, 0, 1, 3, 3, 0, 2, 3, 1, 2, 1, 3, 1, 1, 2, 3, 1, 3, 1, 2, 0, 1, 3, 3, 3, 0, 1, 2, 1, 1, 3, 1, 3, 3, 2]
# id 250
notBad =      [0, 3, 1, 0, 0, 1, 0, 0, 1, 3, 2, 1, 2, 2, 2, 3, 1, 0, 3, 3, 2, 2, 3, 1, 2, 1, 0, 3, 2, 2, 1, 3, 3, 2, 1, 1, 3, 3, 1, 0, 0, 1, 3, 3, 2, 0, 1, 1, 2, 0]
# id 318
id318   =     [0, 3, 1, 0, 3, 1, 0, 0, 0, 0, 1, 1, 2, 2, 2, 1, 3, 0, 3, 2, 3, 2, 1, 1, 2, 1, 1, 3, 2, 2, 1, 1, 2, 3, 1, 3, 2, 1, 3, 0, 1, 1, 0, 3, 0, 1, 2, 1, 0, 1]
# id 64
id64 =        [0, 1, 3, 0, 1, 2, 0, 0, 1, 3, 0, 0, 3, 3, 2, 0, 0, 3, 2, 3, 3, 3, 3, 3, 2, 3, 1, 1, 0, 0, 0, 2, 1, 1, 2, 3, 0, 2, 0, 2, 3, 1, 0, 2, 3, 1, 2, 1, 0, 2]

In [2]:
def string_to_list(a):
    return [int(i) for i in a.strip()]
def list_to_string(a):
    g = ""
    for i in a:
        g += str(i)
    return g

In [9]:
list_to_string([1]*50)


Out[9]:
'11111111111111111111111111111111111111111111111111'

In [10]:
ones = [1]*50
threes = [3]*50

In [11]:
superStrong = string_to_list("03100020110102213032231121123102212230302312301313")
smartGuy =    string_to_list("03100000122322030133023121311231312013330121131332")
notBad =      string_to_list("03100100132122231033223121032213321133100133201120")
id318 = string_to_list("03103100001122213032321121132211231321301103012101")
id64 = string_to_list("01301200130033200323333323110002112302023102312102")
id0 = string_to_list("03100130032320203233231121021302222102023133132331")
id1 = string_to_list("13030000331200023313330331232132222221133232231230")
id2 = string_to_list("00020000110022023002322323321331321132231110320113")
id3 = string_to_list("03120230112122003321333120112322232232102302032011")
id4 = string_to_list("30320000131030223331231112222123323213312100331123")
id5 = string_to_list("30020001310102103133111111222211232132300113313313")
id6 = string_to_list("00000110033300201310323333233331220233110131203211")

r3_1 = string_to_list("00130003101121023033321133211212212321030133121131")
r3_2 = string_to_list("01300120111033103333123323200123222333103302322300")
r3_3 = string_to_list("00020300111320213310122121213233122223331112012311")
r3_4 = string_to_list("00120031113022221001221321311312322322310330020113")
r3_5 = string_to_list("03100300100022223333211122100122332112323132111102")
r3_6 = string_to_list("00020003110003010313111123133123123122301303111231")
r3_7 = string_to_list("03100000010222103313233121022011012122220131121131")
r3_8 = string_to_list("00020321101222120333301321123333221321301303110132")
r3_9 = string_to_list("30000001131022223110113111323122211121213122231320")
r3_10 = string_to_list("00110010101122023233133323123102222333133013332010")

# enemy_dic = {
#     "Ones": ones,
#     "Threes": threes
# }
enemy_dic = {
    "superStrong": superStrong,
    "smartGuy": smartGuy,
    "notBad": notBad,
    "id318": id318,
    "id64": id64,
    "Ones": ones,
    "Threes": threes,
    "id0": id0,
    "id1": id1,
    "id2": id2,
    "id3": id3,
    "id4": id4,
    "id5": id5,
    "id6": id6,
    "r3_1": r3_1,
    "r3_2": r3_2,
    "r3_3": r3_3,
    "r3_4": r3_4,
    "r3_5": r3_5,
    "r3_6": r3_6,
    "r3_7": r3_7,
    "r3_8": r3_8,
    "r3_9": r3_9,
    "r3_10": r3_10,

}

In [22]:
print("i,name,gene")
c = 1
for i in enemy_dic:
    print(c, i, list_to_string(enemy_dic[i]), sep=",")
    c +=1


i,name,gene
1,superStrong,03100020110102213032231121123102212230302312301313
2,smartGuy,03100000122322030133023121311231312013330121131332
3,notBad,03100100132122231033223121032213321133100133201120
4,id318,03103100001122213032321121132211231321301103012101
5,id64,01301200130033200323333323110002112302023102312102
6,Ones,11111111111111111111111111111111111111111111111111
7,Threes,33333333333333333333333333333333333333333333333333
8,id0,03100130032320203233231121021302222102023133132331
9,id1,13030000331200023313330331232132222221133232231230
10,id2,00020000110022023002322323321331321132231110320113
11,id3,03120230112122003321333120112322232232102302032011
12,id4,30320000131030223331231112222123323213312100331123
13,id5,30020001310102103133111111222211232132300113313313
14,id6,00000110033300201310323333233331220233110131203211
15,r3_1,00130003101121023033321133211212212321030133121131
16,r3_2,01300120111033103333123323200123222333103302322300
17,r3_3,00020300111320213310122121213233122223331112012311
18,r3_4,00120031113022221001221321311312322322310330020113
19,r3_5,03100300100022223333211122100122332112323132111102
20,r3_6,00020003110003010313111123133123123122301303111231
21,r3_7,03100000010222103313233121022011012122220131121131
22,r3_8,00020321101222120333301321123333221321301303110132
23,r3_9,30000001131022223110113111323122211121213122231320
24,r3_10,00110010101122023233133323123102222333133013332010

In [ ]: