notebook.community

Edit and run



In [2]:

    
from pyCodeLib import *
import warnings
import glob
import re
import numpy as np
import pandas as pd
from Bio.PDB.Polypeptide import one_to_three

warnings.filterwarnings('ignore')


# sys.path.insert(0, MYHOME)
%load_ext autoreload
%autoreload 2



In [ ]:



In [337]:

    
pre = "/Users/weilu/Research/server/april_2019/optimization_with_frag_iter2/gammas/"
location=pre + "proteins_name_list_phi_pairwise_contact_well4.5_6.5_5.0_10phi_density_mediated_contact_well6.5_9.5_5.0_10_2.6_7.0phi_burial_well4.0_gamma_filtered"
gamma = np.loadtxt(location)
plot_contact_well(gamma[:210], inferBound=True, invert_sign=False)



In [328]:

    
gamma = np.loadtxt("/Users/weilu/Research/server/april_2019/optimization_with_frag_iter1/gammas/proteins_name_list_phi_pairwise_contact_well4.5_6.5_5.0_10phi_density_mediated_contact_well6.5_9.5_5.0_10_2.6_7.0phi_burial_well4.0_gamma_filtered")

plot_contact_well(gamma[:210], inferBound=True, invert_sign=False)



In [250]:

    
gamma = np.loadtxt("/Users/weilu/Research/server/april_2019/complete_gammas/saved_gammas/optimization_restart_iter1/proteins_name_list_phi_pairwise_contact_well4.5_6.5_5.0_10phi_density_mediated_contact_well6.5_9.5_5.0_10_2.6_7.0phi_burial_well4.0_gamma_filtered")
plot_contact_well(gamma[:210], inferBound=True, invert_sign=False)



In [234]:

    
plot_contact_well(gamma[210:420], inferBound=True, invert_sign=False)

actually this part is all about alpha beta protein. not what I want.



In [108]:

    
with open("/Users/weilu/Research/database/queriedPDB.dat") as f:
    a = f.readlines()
a = [i.strip() for i in a]
pdb_list = a[1:-1]



In [109]:

    
len(pdb_list)









    Out[109]:





456



In [114]:

    
from Bio.PDB.PDBParser import PDBParser
data_raw = []
for protein in pdb_list:
    pre = "/Users/weilu/Research/server/april_2019/setup_test_set/cleaned_pdbs/" + protein.lower()
    name = protein
    problematic = 0
    pdbFileLocation = pre + ".pdb"
    structure = PDBParser().get_structure(name, pdbFileLocation)
    seq = ""
    for r in structure.get_residues():
        _, _, chain, (_, resId, _) = r.get_full_id()
        try:
            resName = three_to_one(r.get_resname())
        except:
            problematic = 2
        # assert chain == "A"
#         if chain != chainName:
#             print(i, name, length, len(seq1), chain, chainName)
#             problematic = 3
        seq += resName
    length = len(seq)
    data_raw.append([name, length, seq, problematic])



In [133]:

    
data = pd.DataFrame(data_raw, columns=["Name", "Length", "Seq", "Problematic"]).drop("Problematic", axis=1)



In [127]:

    
data.sort_values("Length").reset_index(drop=True).to_csv("/Users/weilu/Research/library/test_set_info.csv")



In [130]:

    
data = pd.read_csv("/Users/weilu/Research/library/test_set_info.csv", index_col=0)



In [125]:

    
data.sort_values("Length").hist("Length", bins=50)









    Out[125]:





array([[<matplotlib.axes._subplots.AxesSubplot object at 0x1a207db470>]],
      dtype=object)



In [ ]:



In [ ]:



In [3]:

    
os.chdir("/Users/weilu/Research/server/march_2019/optimization_weighted_by_q_iter7/")



In [ ]:

    
"/Users/weilu/Research/server/march_2019/optimization_weighted_by_q_iter7/gammas/"



In [6]:

    
dataset = {"old":("1R69, 1UTG, 3ICB, 256BA, 4CPV, 1CCR, 2MHR, 1MBA, 2FHA".split(", "), 40),
            "new":("1FC2C, 1ENH, 2GB1, 2CRO, 1CTF, 4ICB".split(", "), 80),
            "test":(['1A2J', '1A3H', '1A5Y', '1A8Q', '1AGY', '1AKZ', '1AUZ', '1B1A', '1B31', '1B8X', '1BCO', '1BN6', '1BOH', '1BOI'], 40)}

new_simulation_list = ["bias_2","bias_old_gamma", "iter1_with_bias_96percent", "new_iter2_10", "new_iter1_90", "new_iter2_8", "old_new_iter2_8"]
old_protein_simulation_list = ["iter6_30", "iter5_30", "single", "new_iter3_10", "iter4_30", "iter4_6", "iter4_13"]
simulation_location_list_dic = {}
for p in dataset["new"][0]:
    name = p.lower()[:4]
    simulation_location_list_dic[name] = new_simulation_list
for p in dataset["old"][0]:
    name = p.lower()[:4]
    simulation_location_list_dic[name] = old_protein_simulation_list



In [39]:

    
dataset = {"old":("1R69, 1UTG, 3ICB, 256BA, 4CPV, 1CCR, 2MHR, 1MBA, 2FHA".split(", "), 40),
            "new":("1FC2C, 1ENH, 2GB1, 2CRO, 1CTF, 4ICB".split(", "), 80),
            "test":(['1A2J', '1A3H', '1A5Y', '1A8Q', '1AGY', '1AKZ', '1AUZ', '1B1A', '1B31', '1B8X', '1BCO', '1BN6', '1BOH', '1BOI'], 40)}

new_simulation_list = ["new_iter2_8","bias_old_gamma"]
old_protein_simulation_list = ["iter6_30", "iter5_30"]
simulation_location_list_dic = {}
for p in dataset["new"][0]:
    name = p.lower()[:4]
    simulation_location_list_dic[name] = new_simulation_list
for p in dataset["old"][0]:
    name = p.lower()[:4]
    simulation_location_list_dic[name] = old_protein_simulation_list



In [60]:

    
gamma_file_name = "iter7_thrid_gen"
gamma_file_name = "gammas/proteins_name_list_phi_pairwise_contact_well4.5_6.5_5.0_10phi_density_mediated_contact_well6.5_9.5_5.0_10_2.6_7.0phi_burial_well4.0_gamma_filtered"
gamma_file_name = "test"
z_scores, e_natives, e_mgs, e_mg_stds = validate_hamiltonian_wei(
    "phi_list.txt", "proteins_name_list.txt", gamma_file_name, 
    "lammps", len(new_simulation_list)*6000, mode=2, simulation_location_list_dic=simulation_location_list_dic)









    



0 (10.888644803657137+0j)



In [56]:

    
pre_i = 7
address = f"/Users/weilu/Research/server/march_2019/optimization_weighted_by_q_iter{pre_i}/iter{pre_i}_thrid_gen"
pre_iter_gamma = np.loadtxt(address)



In [57]:

    
np.std(pre_iter_gamma)









    Out[57]:





1.1584179955828737



In [82]:

    
address = f"/Users/weilu/Research/server/april_2019/optimization_mult_seq/gammas/proteins_name_list_phi_pairwise_contact_well4.5_6.5_5.0_10phi_density_mediated_contact_well6.5_9.5_5.0_10_2.6_7.0phi_burial_well4.0_gamma_filtered"
gamma = np.loadtxt(address)
np.std(gamma)









    Out[82]:





0.046736615865453135



In [223]:

    
address = f"/Users/weilu/Research/server/april_2019/complete_gammas/iter0_gamma"
gamma = np.loadtxt(address)
np.std(gamma)









    Out[223]:





0.6487425261519498



In [225]:

    
iter0_normalized = gamma * np.std(original_gamma) / np.std(gamma)



In [226]:

    
np.std(iter0_normalized)









    Out[226]:





0.43622242366014413



In [227]:

    
np.savetxt("/Users/weilu/Research/server/april_2019/complete_gammas/iter0_normalized_gamma", iter0_normalized)



In [107]:

    
address = f"/Users/weilu/Research/server/april_2019/complete_gammas/iter4_gamma"
gamma = np.loadtxt(address)
np.std(gamma)









    Out[107]:





1.1584179955828737



In [86]:

    
address = f"/Users/weilu/Research/server/april_2019/complete_gammas/iter7_gamma"
iter7 = np.loadtxt(address)
np.std(iter7)









    Out[86]:





1.1584179955828737



In [224]:

    
address = f"/Users/weilu/Research/server/april_2019/complete_gammas/original_gamma.dat"
original_gamma = np.loadtxt(address)
np.std(original_gamma)









    Out[224]:





0.4362224236601442



In [90]:

    
np.mean(original_gamma)









    Out[90]:





-0.11829680821739127



In [91]:

    
np.mean(iter7)









    Out[91]:





0.10688544456383424



In [95]:

    
len(original_gamma)









    Out[95]:





690



In [102]:

    
iter7_normalized = iter7 * np.std(original_gamma) / np.std(iter7)



In [106]:

    
plt.hist(iter7_normalized.flatten(), bins=100, range=(-3,3))
a = plt.hist(original_gamma.flatten(), bins=100, range=(-3,3), alpha=0.3)



In [101]:

    
plt.hist(iter7.flatten(), bins=100, range=(-8,8))
plt.hist(original_gamma.flatten(), bins=100, range=(-8,8), alpha=0.3)









    Out[101]:





(array([  0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,
          0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,
          0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,
          0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,  26.,
         39.,  35.,  37.,  65.,  72., 124., 141.,  53.,  37.,  32.,  18.,
          7.,   4.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,
          0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,
          0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,
          0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,
          0.]),
 array([-8.  , -7.84, -7.68, -7.52, -7.36, -7.2 , -7.04, -6.88, -6.72,
        -6.56, -6.4 , -6.24, -6.08, -5.92, -5.76, -5.6 , -5.44, -5.28,
        -5.12, -4.96, -4.8 , -4.64, -4.48, -4.32, -4.16, -4.  , -3.84,
        -3.68, -3.52, -3.36, -3.2 , -3.04, -2.88, -2.72, -2.56, -2.4 ,
        -2.24, -2.08, -1.92, -1.76, -1.6 , -1.44, -1.28, -1.12, -0.96,
        -0.8 , -0.64, -0.48, -0.32, -0.16,  0.  ,  0.16,  0.32,  0.48,
         0.64,  0.8 ,  0.96,  1.12,  1.28,  1.44,  1.6 ,  1.76,  1.92,
         2.08,  2.24,  2.4 ,  2.56,  2.72,  2.88,  3.04,  3.2 ,  3.36,
         3.52,  3.68,  3.84,  4.  ,  4.16,  4.32,  4.48,  4.64,  4.8 ,
         4.96,  5.12,  5.28,  5.44,  5.6 ,  5.76,  5.92,  6.08,  6.24,
         6.4 ,  6.56,  6.72,  6.88,  7.04,  7.2 ,  7.36,  7.52,  7.68,
         7.84,  8.  ]),
 <a list of 100 Patch objects>)



In [83]:

    
address = f"/Users/weilu/Research/server/april_2019/complete_gammas/iter0_gamma"
gamma = np.loadtxt(address)
np.std(gamma)









    Out[83]:





0.6487425261519498



In [80]:

    
pre_i = 7
address = f"/Users/weilu/Research/server/april_2019/optimization_mult_seq/gammas/proteins_name_list_phi_pairwise_contact_well4.5_6.5_5.0_10phi_density_mediated_contact_well6.5_9.5_5.0_10_2.6_7.0phi_burial_well4.0_gamma_filtered"
gamma = np.loadtxt(address)
np.std(gamma)
multiSeq = gamma * np.std(original_gamma) /np.std(gamma)



In [81]:

    
address = "/Users/weilu/Research/server/april_2019/fix_gamma_mix_error/multiSeq"
np.savetxt(address, multiSeq)



In [ ]:



In [73]:

    
address = "/Volumes/Wei_backup/feb_2019/original_gamma.dat"
original_gamma = np.loadtxt(address)
np.std(original_gamma)









    Out[73]:





0.4362224236601442



In [74]:

    
address = "/Users/weilu/Research/server/april_2019/fix_gamma_mix_error/t_7_2"
gamma = np.loadtxt(address)
np.std(gamma)









    Out[74]:





1.1584179955828737



In [76]:

    
normalized = gamma * np.std(original_gamma) /np.std(gamma)



In [77]:

    
address = "/Users/weilu/Research/server/april_2019/fix_gamma_mix_error/t_7_normalized"
np.savetxt(address, normalized)



In [59]:

    
np.savetxt("test", gamma/np.std(gamma)*np.std(pre_iter_gamma))



In [58]:

    
np.std(gamma/np.std(gamma)*np.std(pre_iter_gamma))









    Out[58]:





1.1584179955828737



In [54]:

    
np.std(gamma)









    Out[54]:





4.486000166595644



In [ ]:



In [ ]:

    
address = f"/Users/weilu/Research/server/march_2019/optimization_weighted_by_q_iter7/gammas/proteins_name_list_phi_pairwise_contact_well4.5_6.5_5.0_10phi_density_mediated_contact_well6.5_9.5_5.0_10_2.6_7.0phi_burial_well4.0_gamma_filtered"
gamma = np.loadtxt(address)



In [66]:

    
gamma_file_name = "/Users/weilu/Research/server/march_2019/fix_gamma_mix_error/t_5"
z_scores, e_natives, e_mgs, e_mg_stds = validate_hamiltonian_wei(
    "phi_list.txt", "proteins_name_list.txt", gamma_file_name, 
    "lammps", len(new_simulation_list)*6000, mode=2, simulation_location_list_dic=simulation_location_list_dic)
data = pd.DataFrame([nameList, z_scores, e_natives, e_mgs, e_mg_stds], 
                    index=["Name", "Z", "E_Natives", "E_mgs", "E_mg_stds"]).T
data.Z = data.Z.astype(float)
data.E_Natives = data.E_Natives.astype(float)
data









    



0 (12.902492168806434+0j)






    Out[66]:







  
    
      
      Name
      Z
      E_Natives
      E_mgs
      E_mg_stds
    
  
  
    
      0
      1FC2C
      12.902492
      -72.939507
      -14.5936
      4.52206
    
    
      1
      1ENH
      10.936765
      -94.788777
      -19.966
      6.8414
    
    
      2
      2GB1
      12.988132
      -131.775597
      -27.7624
      8.00833
    
    
      3
      2CRO
      10.867731
      -130.273058
      -25.1145
      9.67622
    
    
      4
      1CTF
      10.445175
      -123.937394
      -28.4003
      9.14653
    
    
      5
      4ICB
      12.425854
      -167.259641
      -46.5205
      9.71677
    
    
      6
      1R69
      4.628182
      -114.374358
      -40.9413
      15.8665
    
    
      7
      1UTG
      4.358094
      -126.820263
      -59.8845
      15.3589
    
    
      8
      3ICB
      3.778402
      -161.489415
      -113.374
      12.7343
    
    
      9
      256BA
      2.836460
      -159.889357
      -78.9694
      28.5285
    
    
      10
      4CPV
      3.097858
      -156.959441
      -106.82
      16.1851
    
    
      11
      1CCR
      1.543357
      -237.494016
      -181.156
      36.5032
    
    
      12
      2MHR
      3.376231
      -226.974582
      -135.286
      27.1569
    
    
      13
      1MBA
      1.707582
      -226.946629
      -189.144
      22.1382
    
    
      14
      2FHA
      3.059179
      -307.398832
      -218.884
      28.9341



In [67]:

    
gamma_file_name = "/Users/weilu/Research/server/march_2019/fix_gamma_mix_error/t_7"
z_scores, e_natives, e_mgs, e_mg_stds = validate_hamiltonian_wei(
    "phi_list.txt", "proteins_name_list.txt", gamma_file_name, 
    "lammps", len(new_simulation_list)*6000, mode=2, simulation_location_list_dic=simulation_location_list_dic)
data = pd.DataFrame([nameList, z_scores, e_natives, e_mgs, e_mg_stds], 
                    index=["Name", "Z", "E_Natives", "E_mgs", "E_mg_stds"]).T
data.Z = data.Z.astype(float)
data.E_Natives = data.E_Natives.astype(float)
data









    



0 (15.394139633453651+0j)






    Out[67]:







  
    
      
      Name
      Z
      E_Natives
      E_mgs
      E_mg_stds
    
  
  
    
      0
      1FC2C
      15.394140
      -67.082927
      -12.7573
      3.52898
    
    
      1
      1ENH
      14.454072
      -82.326354
      -16.2223
      4.57339
    
    
      2
      2GB1
      17.995654
      -120.965843
      -22.645
      5.46359
    
    
      3
      2CRO
      14.485697
      -111.494256
      -19.7966
      6.33022
    
    
      4
      1CTF
      11.557301
      -111.826330
      -24.7091
      7.53785
    
    
      5
      4ICB
      13.055911
      -137.497174
      -39.0883
      7.53749
    
    
      6
      1R69
      6.178104
      -92.994433
      -30.7364
      10.0772
    
    
      7
      1UTG
      5.467374
      -97.024359
      -45.5508
      9.41469
    
    
      8
      3ICB
      5.426244
      -130.648535
      -82.8196
      8.81438
    
    
      9
      256BA
      4.438673
      -123.890840
      -54.9674
      15.5279
    
    
      10
      4CPV
      3.335522
      -127.115741
      -86.5347
      12.1663
    
    
      11
      1CCR
      1.310163
      -146.866948
      -118.83
      21.3993
    
    
      12
      2MHR
      3.643060
      -174.225559
      -102.152
      19.7838
    
    
      13
      1MBA
      2.275258
      -192.369138
      -152.473
      17.5348
    
    
      14
      2FHA
      3.693267
      -217.992171
      -157.959
      16.2549



In [65]:

    
gamma_file_name = "/Users/weilu/Research/server/march_2019/fix_gamma_mix_error/t_6"
z_scores, e_natives, e_mgs, e_mg_stds = validate_hamiltonian_wei(
    "phi_list.txt", "proteins_name_list.txt", gamma_file_name, 
    "lammps", len(new_simulation_list)*6000, mode=2, simulation_location_list_dic=simulation_location_list_dic)
data = pd.DataFrame([nameList, z_scores, e_natives, e_mgs, e_mg_stds], 
                    index=["Name", "Z", "E_Natives", "E_mgs", "E_mg_stds"]).T
data.Z = data.Z.astype(float)
data.E_Natives = data.E_Natives.astype(float)
data









    



0 (15.557881500459525+0j)






    Out[65]:







  
    
      
      Name
      Z
      E_Natives
      E_mgs
      E_mg_stds
    
  
  
    
      0
      1FC2C
      15.557882
      -66.409632
      -12.584
      3.4597
    
    
      1
      1ENH
      14.481983
      -82.012968
      -16.1626
      4.54706
    
    
      2
      2GB1
      18.132325
      -120.221625
      -22.5164
      5.38845
    
    
      3
      2CRO
      14.629681
      -110.247560
      -19.5473
      6.19974
    
    
      4
      1CTF
      11.518154
      -111.089092
      -24.6283
      7.50648
    
    
      5
      4ICB
      13.167126
      -136.222312
      -38.5971
      7.41431
    
    
      6
      1R69
      6.047398
      -92.728155
      -30.8776
      10.2276
    
    
      7
      1UTG
      5.227171
      -95.725888
      -45.4891
      9.61071
    
    
      8
      3ICB
      5.297855
      -129.444069
      -82.6104
      8.84012
    
    
      9
      256BA
      4.179782
      -122.787016
      -55.5428
      16.088
    
    
      10
      4CPV
      3.476587
      -128.232387
      -86.4684
      12.0129
    
    
      11
      1CCR
      1.257704
      -147.560831
      -120.253
      21.7125
    
    
      12
      2MHR
      3.628087
      -175.459441
      -103.038
      19.9612
    
    
      13
      1MBA
      2.233051
      -191.310877
      -152.205
      17.5125
    
    
      14
      2FHA
      3.719959
      -219.850967
      -158.76
      16.4226



In [68]:

    
gamma_file_name = "/Users/weilu/Research/server/march_2019/fix_gamma_mix_error/t_7_2"
z_scores, e_natives, e_mgs, e_mg_stds = validate_hamiltonian_wei(
    "phi_list.txt", "proteins_name_list.txt", gamma_file_name, 
    "lammps", len(new_simulation_list)*6000, mode=2, simulation_location_list_dic=simulation_location_list_dic)
data = pd.DataFrame([nameList, z_scores, e_natives, e_mgs, e_mg_stds], 
                    index=["Name", "Z", "E_Natives", "E_mgs", "E_mg_stds"]).T
data.Z = data.Z.astype(float)
data.E_Natives = data.E_Natives.astype(float)
data









    



0 (14.95306946470842+0j)






    Out[68]:







  
    
      
      Name
      Z
      E_Natives
      E_mgs
      E_mg_stds
    
  
  
    
      0
      1FC2C
      14.953069
      -59.898056
      -11.3384
      3.24747
    
    
      1
      1ENH
      13.864714
      -70.647750
      -13.9516
      4.08924
    
    
      2
      2GB1
      17.531352
      -106.238755
      -19.8175
      4.92953
    
    
      3
      2CRO
      14.272086
      -96.801886
      -17.1511
      5.58088
    
    
      4
      1CTF
      11.259620
      -92.898240
      -20.598
      6.4212
    
    
      5
      4ICB
      12.790158
      -124.714568
      -35.3586
      6.98631
    
    
      6
      1R69
      6.966446
      -85.374952
      -26.9794
      8.38241
    
    
      7
      1UTG
      6.731735
      -94.339031
      -41.1663
      7.89882
    
    
      8
      3ICB
      7.425543
      -119.708526
      -68.3466
      6.91692
    
    
      9
      256BA
      5.390923
      -118.059756
      -48.3401
      12.9328
    
    
      10
      4CPV
      5.108738
      -123.618639
      -73.8877
      9.73448
    
    
      11
      1CCR
      2.465812
      -145.468158
      -102.744
      17.3265
    
    
      12
      2MHR
      5.887625
      -185.002005
      -92.0686
      15.7845
    
    
      13
      1MBA
      3.995153
      -184.497248
      -129.042
      13.8807
    
    
      14
      2FHA
      6.311264
      -224.000715
      -139.049
      13.4603



In [62]:

    
gamma_file_name = "/Users/weilu/Research/server/march_2019/fix_gamma_mix_error/t"
z_scores, e_natives, e_mgs, e_mg_stds = validate_hamiltonian_wei(
    "phi_list.txt", "proteins_name_list.txt", gamma_file_name, 
    "lammps", len(new_simulation_list)*6000, mode=2, simulation_location_list_dic=simulation_location_list_dic)
data = pd.DataFrame([nameList, z_scores, e_natives, e_mgs, e_mg_stds], 
                    index=["Name", "Z", "E_Natives", "E_mgs", "E_mg_stds"]).T
data.Z = data.Z.astype(float)
data.E_Natives = data.E_Natives.astype(float)
data









    



0 (15.881770605078113+0j)



In [64]:









    Out[64]:







  
    
      
      Name
      Z
      E_Natives
      E_mgs
      E_mg_stds
    
  
  
    
      0
      1FC2C
      15.881771
      -69.026462
      -13.1503
      3.51826
    
    
      1
      1ENH
      15.042489
      -88.963180
      -17.4922
      4.75127
    
    
      2
      2GB1
      18.434727
      -127.254795
      -23.9252
      5.60516
    
    
      3
      2CRO
      14.679367
      -118.191521
      -21.0059
      6.62056
    
    
      4
      1CTF
      11.772371
      -124.534577
      -27.5061
      8.24205
    
    
      5
      4ICB
      13.373861
      -139.045679
      -39.5249
      7.44144
    
    
      6
      1R69
      5.286558
      -93.297144
      -32.6843
      11.4655
    
    
      7
      1UTG
      4.003590
      -89.440327
      -46.6201
      10.6955
    
    
      8
      3ICB
      3.539106
      -130.550986
      -93.1623
      10.5644
    
    
      9
      256BA
      3.295272
      -117.900011
      -58.654
      17.9791
    
    
      10
      4CPV
      1.819500
      -119.969886
      -94.2593
      14.1306
    
    
      11
      1CCR
      0.210996
      -134.562463
      -129.278
      25.0473
    
    
      12
      2MHR
      1.620543
      -143.789168
      -105.903
      23.3788
    
    
      13
      1MBA
      0.711789
      -182.177810
      -167.432
      20.7172
    
    
      14
      2FHA
      1.192157
      -189.992927
      -167.524
      18.8475



In [63]:

    
# normalized with std.



In [61]:

    
data = pd.DataFrame([nameList, z_scores, e_natives, e_mgs, e_mg_stds], 
                    index=["Name", "Z", "E_Natives", "E_mgs", "E_mg_stds"]).T
data.Z = data.Z.astype(float)
data.E_Natives = data.E_Natives.astype(float)
data









    Out[61]:







  
    
      
      Name
      Z
      E_Natives
      E_mgs
      E_mg_stds
    
  
  
    
      0
      1FC2C
      10.888645
      -27.679025
      -5.20934
      2.06359
    
    
      1
      1ENH
      9.251952
      -24.048072
      -4.82697
      2.07752
    
    
      2
      2GB1
      12.410122
      -43.414874
      -7.88703
      2.86281
    
    
      3
      2CRO
      10.234838
      -37.913786
      -6.68482
      3.05124
    
    
      4
      1CTF
      7.645952
      -24.047627
      -5.33915
      2.44685
    
    
      5
      4ICB
      10.266856
      -62.414439
      -17.7518
      4.35017
    
    
      6
      1R69
      14.526236
      -43.950599
      -10.2149
      2.3224
    
    
      7
      1UTG
      18.617068
      -64.288211
      -19.3788
      2.41227
    
    
      8
      3ICB
      17.566746
      -62.966443
      -15.6378
      2.69422
    
    
      9
      256BA
      17.457549
      -73.472311
      -17.7937
      3.18937
    
    
      10
      4CPV
      14.822274
      -77.716004
      -23.531
      3.65565
    
    
      11
      1CCR
      13.619389
      -99.237549
      -32.6869
      4.88646
    
    
      12
      2MHR
      24.058968
      -154.683226
      -40.303
      4.75416
    
    
      13
      1MBA
      16.661417
      -116.157490
      -38.3161
      4.67195
    
    
      14
      2FHA
      17.594301
      -170.013749
      -53.5348
      6.62026



In [ ]:

    
# with more decoys.



In [41]:

    
data = pd.DataFrame([nameList, z_scores, e_natives, e_mgs, e_mg_stds], 
                    index=["Name", "Z", "E_Natives", "E_mgs", "E_mg_stds"]).T
data.Z = data.Z.astype(float)
data.E_Natives = data.E_Natives.astype(float)
data









    Out[41]:







  
    
      
      Name
      Z
      E_Natives
      E_mgs
      E_mg_stds
    
  
  
    
      0
      1FC2C
      10.888645
      -107.187656
      -20.1733
      7.99129
    
    
      1
      1ENH
      9.251952
      -93.126710
      -18.6925
      8.04524
    
    
      2
      2GB1
      12.410122
      -168.125094
      -30.5427
      11.0863
    
    
      3
      2CRO
      10.234838
      -146.822004
      -25.8871
      11.816
    
    
      4
      1CTF
      7.645952
      -93.124987
      -20.676
      9.47547
    
    
      5
      4ICB
      10.266856
      -241.701341
      -68.7444
      16.8461
    
    
      6
      1R69
      14.526236
      -170.199699
      -39.5575
      8.99353
    
    
      7
      1UTG
      18.617068
      -248.957567
      -75.0448
      9.34158
    
    
      8
      3ICB
      17.566746
      -243.838989
      -60.5577
      10.4334
    
    
      9
      256BA
      17.457549
      -284.523203
      -68.9066
      12.3509
    
    
      10
      4CPV
      14.822274
      -300.957002
      -91.1242
      14.1566
    
    
      11
      1CCR
      13.619389
      -384.299676
      -126.581
      18.9229
    
    
      12
      2MHR
      24.058968
      -599.014329
      -156.074
      18.4106
    
    
      13
      1MBA
      16.661417
      -449.822535
      -148.38
      18.0922
    
    
      14
      2FHA
      17.594301
      -658.382129
      -207.315
      25.6371



In [20]:

    
nameList = dataset["new"][0] + dataset["old"][0]



In [37]:

    
data = pd.DataFrame([nameList, z_scores, e_natives, e_mgs, e_mg_stds], 
                    index=["Name", "Z", "E_Natives", "E_mgs", "E_mg_stds"]).T
data.Z = data.Z.astype(float)
data.E_Natives = data.E_Natives.astype(float)



In [38]:

    
data









    Out[38]:







  
    
      
      Name
      Z
      E_Natives
      E_mgs
      E_mg_stds
    
  
  
    
      0
      1FC2C
      23.480789
      -107.187656
      -15.5339
      3.90335
    
    
      1
      1ENH
      26.479082
      -93.126710
      -13.9019
      2.99198
    
    
      2
      2GB1
      22.975176
      -168.125094
      -28.8205
      6.06327
    
    
      3
      2CRO
      31.285547
      -146.822004
      -18.9095
      4.08855
    
    
      4
      1CTF
      11.639435
      -93.124987
      -18.7614
      6.38894
    
    
      5
      4ICB
      18.548103
      -241.701341
      -63.6676
      9.59849
    
    
      6
      1R69
      14.591790
      -170.199699
      -43.0636
      8.71285
    
    
      7
      1UTG
      20.877930
      -248.957567
      -78.7209
      8.15391
    
    
      8
      3ICB
      19.381229
      -243.838989
      -58.0876
      9.58409
    
    
      9
      256BA
      17.424797
      -284.523203
      -72.1624
      12.1873
    
    
      10
      4CPV
      14.375748
      -300.957002
      -86.6941
      14.9045
    
    
      11
      1CCR
      13.228477
      -384.299676
      -127.827
      19.3879
    
    
      12
      2MHR
      22.098569
      -599.014329
      -156.493
      20.0249
    
    
      13
      1MBA
      16.496307
      -449.822535
      -144.696
      18.4967
    
    
      14
      2FHA
      16.351727
      -658.382129
      -209.959
      27.4236

iter7 gamma



In [28]:

    
data = pd.DataFrame([nameList, z_scores, e_natives, e_mgs, e_mg_stds], 
                    index=["Name", "Z", "E_Natives", "E_mgs", "E_mg_stds"]).T
data.Z = data.Z.astype(float)
data.E_Natives = data.E_Natives.astype(float)



In [35]:

    
data









    Out[35]:







  
    
      
      Name
      Z
      E_Natives
      E_mgs
      E_mg_stds
    
  
  
    
      0
      1FC2C
      20.736046
      -54.027982
      -8.9613
      2.17335
    
    
      1
      1ENH
      25.163685
      -75.813521
      -12.4091
      2.51968
    
    
      2
      2GB1
      20.575525
      -99.521555
      -19.1208
      3.90759
    
    
      3
      2CRO
      27.249896
      -105.345429
      -14.488
      3.33423
    
    
      4
      1CTF
      13.052314
      -98.729705
      -19.4564
      6.0735
    
    
      5
      4ICB
      20.471473
      -134.771453
      -34.2875
      4.90849
    
    
      6
      1R69
      2.991517
      -95.344303
      -52.5771
      14.2962
    
    
      7
      1UTG
      3.541324
      -108.984502
      -68.9202
      11.3134
    
    
      8
      3ICB
      1.407566
      -130.398896
      -118.733
      8.28805
    
    
      9
      256BA
      1.246208
      -137.257527
      -107.214
      24.1078
    
    
      10
      4CPV
      0.786581
      -126.445572
      -109.126
      22.0186
    
    
      11
      1CCR
      0.699880
      -243.915832
      -215.168
      41.0761
    
    
      12
      2MHR
      1.438503
      -181.273917
      -139.694
      28.905
    
    
      13
      1MBA
      -0.685222
      -179.977666
      -193.978
      20.432
    
    
      14
      2FHA
      1.296879
      -277.381832
      -247.049
      23.3892



In [138]:

    
pd.concat([data[:4],data[:4]]).reset_index(drop=False)









    Out[138]:







  
    
      
      index
      Name
      Length
      Seq
    
  
  
    
      0
      0
      1A2J
      188
      AQYEDGKQYTTLEKPVAGAPQVLEFFSFFCPHCYQFEEVLHISDNV...
    
    
      1
      1
      1A3H
      300
      SVVEEHGQLSISNGELVNERGEQVQLKGMSSHGLQWYGQFVNYESM...
    
    
      2
      2
      1A5Y
      284
      EMEKEFEQIDKSGSWAAIYQDIRHEASDFPCRVAKLPKNKNRNRYR...
    
    
      3
      3
      1A8Q
      274
      PICTTRDGVEIFYKDWGQGRPVVFIHGWPLNGDAWQDQLKAVVDAG...
    
    
      4
      0
      1A2J
      188
      AQYEDGKQYTTLEKPVAGAPQVLEFFSFFCPHCYQFEEVLHISDNV...
    
    
      5
      1
      1A3H
      300
      SVVEEHGQLSISNGELVNERGEQVQLKGMSSHGLQWYGQFVNYESM...
    
    
      6
      2
      1A5Y
      284
      EMEKEFEQIDKSGSWAAIYQDIRHEASDFPCRVAKLPKNKNRNRYR...
    
    
      7
      3
      1A8Q
      274
      PICTTRDGVEIFYKDWGQGRPVVFIHGWPLNGDAWQDQLKAVVDAG...



In [172]:

    
Q = pd.read_csv(f"/Users/weilu/Research/server/april_2019/database/iter4_30_3icb_11/wham.dat")[" Qw"]



In [151]:

    
len(Q)









    Out[151]:





1000



In [162]:

    
a = pd.concat([Q, Q]).reset_index()
a["Rank"] = a["index"].rank(ascending=False)
a.query(f"Rank < {200*30}")



In [169]:

    
pd.read_csv("/Users/weilu/Research/server/april_2019/database/Q_bias_2_1ctf", index_col=0).head()



In [177]:

    
a = pd.read_csv("/Users/weilu/Research/server/april_2019/database/Q_iter4_13_2fha", index_col=0).query(f"Rank < {200*30}")



In [200]:

    
simulation_location = "test"
name = "a"
sampled = a.sample(10).assign(Name=f"{simulation_location}_{name}_")
sampled["Location"] = sampled["Name"] + sampled["Run"].astype(str) + \
            "/frame" + sampled["index"].astype(str) + " " + sampled[" Qw"].round(3).astype(str)



In [ ]:

    
with open(f"decoys/lammps/{name}_{simulation_location}.decoys", "w") as out:
    for i, item in sampled.iterrows():
        f.write(f"{simulation_location}_{name}_{item['Run']}/frame{item['index']} {np.round(item[' Qw'], 3)}\n")



In [221]:

    
np.round(0.4124, 3)









    Out[221]:





0.412



In [222]:

    
for i, item in sampled.iterrows():
    print(f"{simulation_location}_{name}_{item['Run']}/frame{item['index']} {np.round(item[' Qw'], 3)}\n")









    



test_a_0/frame376 0.234

test_a_10/frame316 0.357

test_a_5/frame436 0.36

test_a_11/frame421 0.363

test_a_9/frame330 0.353

test_a_7/frame282 0.412

test_a_19/frame366 0.292

test_a_19/frame310 0.269

test_a_15/frame271 0.291

test_a_10/frame326 0.367



In [206]:

    
a = sampled["Location"].values



In [205]:

    
a.values









    Out[205]:





array(['test_a_0/frame376 0.234', 'test_a_10/frame316 0.357',
       'test_a_5/frame436 0.36', 'test_a_11/frame421 0.363',
       'test_a_9/frame330 0.353', 'test_a_7/frame282 0.412',
       'test_a_19/frame366 0.292', 'test_a_19/frame310 0.269',
       'test_a_15/frame271 0.291', 'test_a_10/frame326 0.367'],
      dtype=object)



In [150]:

    
pd.concat([Q, Q]).reset_index(drop=False)









    Out[150]:







  
    
      
      index
      Qw
    
  
  
    
      0
      0
      0.060984
    
    
      1
      1
      0.137520
    
    
      2
      2
      0.222031
    
    
      3
      3
      0.197840
    
    
      4
      4
      0.273562
    
    
      5
      5
      0.266006
    
    
      6
      6
      0.286569
    
    
      7
      7
      0.218554
    
    
      8
      8
      0.260241
    
    
      9
      9
      0.332594
    
    
      10
      10
      0.398317
    
    
      11
      11
      0.350271
    
    
      12
      12
      0.416690
    
    
      13
      13
      0.361991
    
    
      14
      14
      0.379512
    
    
      15
      15
      0.366766
    
    
      16
      16
      0.351632
    
    
      17
      17
      0.339836
    
    
      18
      18
      0.291605
    
    
      19
      19
      0.262566
    
    
      20
      20
      0.265882
    
    
      21
      21
      0.266080
    
    
      22
      22
      0.264690
    
    
      23
      23
      0.251368
    
    
      24
      24
      0.259581
    
    
      25
      25
      0.271665
    
    
      26
      26
      0.428057
    
    
      27
      27
      0.433507
    
    
      28
      28
      0.410504
    
    
      29
      29
      0.325923
    
    
      ...
      ...
      ...
    
    
      1970
      970
      0.767218
    
    
      1971
      971
      0.779492
    
    
      1972
      972
      0.771003
    
    
      1973
      973
      0.771163
    
    
      1974
      974
      0.735662
    
    
      1975
      975
      0.728089
    
    
      1976
      976
      0.773779
    
    
      1977
      977
      0.804048
    
    
      1978
      978
      0.762598
    
    
      1979
      979
      0.805954
    
    
      1980
      980
      0.799129
    
    
      1981
      981
      0.873564
    
    
      1982
      982
      0.857443
    
    
      1983
      983
      0.831532
    
    
      1984
      984
      0.733035
    
    
      1985
      985
      0.728545
    
    
      1986
      986
      0.787008
    
    
      1987
      987
      0.750969
    
    
      1988
      988
      0.878250
    
    
      1989
      989
      0.811806
    
    
      1990
      990
      0.829812
    
    
      1991
      991
      0.836699
    
    
      1992
      992
      0.834538
    
    
      1993
      993
      0.790569
    
    
      1994
      994
      0.825448
    
    
      1995
      995
      0.779203
    
    
      1996
      996
      0.750317
    
    
      1997
      997
      0.795165
    
    
      1998
      998
      0.741362
    
    
      1999
      999
      0.723373
    
  

2000 rows × 2 columns



In [ ]:

    
data.to_csv()



In [230]:

    
HFscales = pd.read_csv("~/opt/small_script/Whole_residue_HFscales.txt", sep="\t")



In [231]:

    
HFscales



In [ ]:



In [236]:



In [242]:

    
direct = membrane_gamma[:210][:,0]
protein = membrane_gamma[210:][:,0]
water = membrane_gamma[210:][:,1]



In [246]:



In [247]:

    
def simulation_to_iteration_gamma(membrane_gamma):
    direct = membrane_gamma[:210][:,0]
    protein = membrane_gamma[210:][:,0]
    water = membrane_gamma[210:][:,1]
    return np.concatenate([direct, protein, water])



In [260]:

    
membrane_gamma = np.loadtxt("/Users/weilu/openmmawsem/parameters/membrane_gamma_rescaled.dat")
membrane_gamma_formated = simulation_to_iteration_gamma(membrane_gamma)
np.std(membrane_gamma_formated)









    Out[260]:





0.40883106885391807



In [263]:

    
np.mean(membrane_gamma_formated)









    Out[263]:





-0.01062296825396825



In [264]:

    
np.mean(water_gamma_formated)









    Out[264]:





0.06386869841269842



In [251]:

    
membrane_gamma = np.loadtxt("/Users/weilu/opt/parameters/membrane/gamma.dat")
membrane_gamma_formated = simulation_to_iteration_gamma(membrane_gamma)
water_gamma = np.loadtxt("/Users/weilu/opt/parameters/globular_parameters/gamma.dat")
water_gamma_formated = simulation_to_iteration_gamma(water_gamma)
ratio = np.std(water_gamma_formated)/np.std(membrane_gamma_formated)
membrane_gamma_formated_rescaled = membrane_gamma_formated * ratio



In [252]:

    
np.std(membrane_gamma_formated)









    Out[252]:





0.0035361816815055996



In [253]:

    
np.std(water_gamma_formated)









    Out[253]:





0.40883096718693446



In [255]:

    
ratio = np.std(water_gamma_formated)/np.std(membrane_gamma_formated)



In [257]:

    
np.std(membrane_gamma_formated * ratio)









    Out[257]:





0.4088309671869345



In [258]:

    
membrane_gamma_formated_rescaled = membrane_gamma_formated * ratio



In [259]:

    
gamma = membrane_gamma_formated_rescaled
with open("/Users/weilu/openmmawsem/parameters/membrane_gamma_rescaled.dat", "w") as out:
    c = 0
    for i in range(20):
        for j in range(i, 20):
            out.write(f"{gamma[c]:<.5f} {gamma[c]:10.5f}\n")
            c += 1
    out.write("\n")
    for i in range(20):
        for j in range(i, 20):
            # protein, water
            out.write(f"{gamma[c]:<.5f} {gamma[c+210]:10.5f}\n")
            c += 1



In [ ]:



In [272]:

    
from simtk.openmm.app import *
from simtk.openmm import *
def read_trajectory_pdb_positions(pdb_trajectory_filename):
    import uuid, os
    pdb_trajectory_contents = open(pdb_trajectory_filename).read().split("MODEL")[1:]
    pdb_trajectory_contents = ['\n'.join(x.split('\n')[1:]) for x in pdb_trajectory_contents]
    pdb_trajectory = []
    for i, pdb_contents in enumerate(pdb_trajectory_contents):
        temporary_file_name = str(uuid.uuid4())
        temporary_pdb_file = open(temporary_file_name, 'w')
        temporary_pdb_file.write(pdb_contents)
        temporary_pdb_file.close()
        pdb = PDBFile(temporary_file_name)
        pdb_trajectory.append(pdb)
        os.remove(temporary_file_name)
    return pdb_trajectory



In [267]:

    
import mdtraj as md
location = "/Users/weilu/Research/server/april_2019/complete_2xov/teaser_simulation_annealing/test.pdb"
pdb_trajectory = md.load(location)



In [305]:

    
location = "/Users/weilu/Research/server/april_2019/complete_2xov/teaser_simulation_annealing/test.pdb"
# location = "/Users/weilu/Research/server/april_2019/complete_2xov/teaser_simulation_annealing/origianl.pdb"
location = "/Users/weilu/Research/server/april_2019/complete_2xov/smaller.pdb"
a = md.iterload(location, chunk=20)



In [323]:

    
%%time
location = "/Users/weilu/Research/server/april_2019/complete_2xov/movie.dcd"
# a = md.iterload(location, chunk=20)
# a = md.load(location, top="/Users/weilu/Research/server/april_2019/complete_2xov/complete_2xov-openmmawsem.pdb")
a = md.load(location, top="/Users/weilu/Research/server/april_2019/complete_2xov/native.pdb")
# for traj in a:
#     print(traj.openmm_positions)









    



CPU times: user 102 ms, sys: 15.1 ms, total: 117 ms
Wall time: 166 ms



In [324]:

    
%%time
location = "/Users/weilu/Research/server/april_2019/complete_2xov/movie.dcd"
# a = md.iterload(location, chunk=20)
a = md.load(location, top="/Users/weilu/Research/server/april_2019/complete_2xov/complete_2xov-openmmawsem.pdb")
a = md.load(location, top="/Users/weilu/Research/server/april_2019/complete_2xov/complete_2xov-openmmawsem.pdb")
# for traj in a:
#     print(traj.openmm_positions)









    



CPU times: user 202 ms, sys: 2.84 ms, total: 204 ms
Wall time: 131 ms



In [325]:

    
%%time
a = md.iterload(location, chunk=20)
# a = md.load(location)
# for traj in a:
#     print(traj.openmm_positions)









    



CPU times: user 14 µs, sys: 4 µs, total: 18 µs
Wall time: 20 µs



In [ ]:



In [326]:

    
location = "/Users/weilu/Research/server/april_2019/complete_2xov/movie.dcd"
f = md.open(location)
f.seek(0)



In [313]:

    
f.positions.shape









    Out[313]:





(200, 1620, 3)



In [296]:

    
for frame,traj in enumerate(a):
    print(md.rmsd(traj, traj))



In [290]:

    
START = 0
STOP = 3
STRIDE = 1
with md.open(location) as f:
    f.seek(START)
    print(f)
    t = f.read_as_traj(
        topology, n_frames=STOP-START, stride=STRIDE
    )
    print(t)









    



---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-290-bee2b0d4ff15> in <module>
      3 STRIDE = 1
      4 with md.open(location) as f:
----> 5     f.seek(START)
      6     print(f)
      7     t = f.read_as_traj(

AttributeError: 'PDBTrajectoryFile' object has no attribute 'seek'



In [280]:

    
a = PDBFile(location)



In [273]:

    
a = read_trajectory_pdb_positions(location)



In [287]:

    
len(pdb_trajectory)









    Out[287]:





6



In [288]:

    
# pdb_trajectory.openmm_positions(0)

	index	Qw	Rank
0	0	0.261345	59985.5
1	1	0.291708	59955.5
2	2	0.282458	59925.5
3	3	0.218886	59895.5
4	4	0.262429	59865.5

	AA	DGwif	DGwoct	Oct-IF
0	Ala	0.17	0.50	0.33
1	Arg+	0.81	1.81	1.00
2	Asn	0.42	0.85	0.43
3	Asp-	1.23	3.64	2.41
4	Asp0	-0.07	0.43	0.50
5	Cys	-0.24	-0.02	0.22
6	Gln	0.58	0.77	0.19
7	Glu-	2.02	3.63	1.61
8	Glu0	-0.01	0.11	0.12
9	Gly	0.01	1.15	1.14
10	His+	0.96	2.33	1.37
11	His0	0.17	0.11	-0.06
12	Ile	-0.31	-1.12	-0.81
13	Leu	-0.56	-1.25	-0.69
14	Lys+	0.99	2.80	1.81
15	Met	-0.23	-0.67	-0.44
16	Phe	-1.13	-1.71	-0.58
17	Pro	0.45	0.14	-0.31
18	Ser	0.13	0.46	0.33
19	Thr	0.14	0.25	0.11
20	Trp	-1.85	-2.09	-0.24
21	Tyr	-0.94	-0.71	0.23
22	Val	0.07	-0.46	-0.53

	Name	Z	E_Natives	E_mgs	E_mg_stds
0	1FC2C	12.902492	-72.939507	-14.5936	4.52206
1	1ENH	10.936765	-94.788777	-19.966	6.8414
2	2GB1	12.988132	-131.775597	-27.7624	8.00833
3	2CRO	10.867731	-130.273058	-25.1145	9.67622
4	1CTF	10.445175	-123.937394	-28.4003	9.14653
5	4ICB	12.425854	-167.259641	-46.5205	9.71677
6	1R69	4.628182	-114.374358	-40.9413	15.8665
7	1UTG	4.358094	-126.820263	-59.8845	15.3589
8	3ICB	3.778402	-161.489415	-113.374	12.7343
9	256BA	2.836460	-159.889357	-78.9694	28.5285
10	4CPV	3.097858	-156.959441	-106.82	16.1851
11	1CCR	1.543357	-237.494016	-181.156	36.5032
12	2MHR	3.376231	-226.974582	-135.286	27.1569
13	1MBA	1.707582	-226.946629	-189.144	22.1382
14	2FHA	3.059179	-307.398832	-218.884	28.9341

	Name	Z	E_Natives	E_mgs	E_mg_stds
0	1FC2C	15.394140	-67.082927	-12.7573	3.52898
1	1ENH	14.454072	-82.326354	-16.2223	4.57339
2	2GB1	17.995654	-120.965843	-22.645	5.46359
3	2CRO	14.485697	-111.494256	-19.7966	6.33022
4	1CTF	11.557301	-111.826330	-24.7091	7.53785
5	4ICB	13.055911	-137.497174	-39.0883	7.53749
6	1R69	6.178104	-92.994433	-30.7364	10.0772
7	1UTG	5.467374	-97.024359	-45.5508	9.41469
8	3ICB	5.426244	-130.648535	-82.8196	8.81438
9	256BA	4.438673	-123.890840	-54.9674	15.5279
10	4CPV	3.335522	-127.115741	-86.5347	12.1663
11	1CCR	1.310163	-146.866948	-118.83	21.3993
12	2MHR	3.643060	-174.225559	-102.152	19.7838
13	1MBA	2.275258	-192.369138	-152.473	17.5348
14	2FHA	3.693267	-217.992171	-157.959	16.2549

	Name	Z	E_Natives	E_mgs	E_mg_stds
0	1FC2C	15.557882	-66.409632	-12.584	3.4597
1	1ENH	14.481983	-82.012968	-16.1626	4.54706
2	2GB1	18.132325	-120.221625	-22.5164	5.38845
3	2CRO	14.629681	-110.247560	-19.5473	6.19974
4	1CTF	11.518154	-111.089092	-24.6283	7.50648
5	4ICB	13.167126	-136.222312	-38.5971	7.41431
6	1R69	6.047398	-92.728155	-30.8776	10.2276
7	1UTG	5.227171	-95.725888	-45.4891	9.61071
8	3ICB	5.297855	-129.444069	-82.6104	8.84012
9	256BA	4.179782	-122.787016	-55.5428	16.088
10	4CPV	3.476587	-128.232387	-86.4684	12.0129
11	1CCR	1.257704	-147.560831	-120.253	21.7125
12	2MHR	3.628087	-175.459441	-103.038	19.9612
13	1MBA	2.233051	-191.310877	-152.205	17.5125
14	2FHA	3.719959	-219.850967	-158.76	16.4226

	Name	Z	E_Natives	E_mgs	E_mg_stds
0	1FC2C	14.953069	-59.898056	-11.3384	3.24747
1	1ENH	13.864714	-70.647750	-13.9516	4.08924
2	2GB1	17.531352	-106.238755	-19.8175	4.92953
3	2CRO	14.272086	-96.801886	-17.1511	5.58088
4	1CTF	11.259620	-92.898240	-20.598	6.4212
5	4ICB	12.790158	-124.714568	-35.3586	6.98631
6	1R69	6.966446	-85.374952	-26.9794	8.38241
7	1UTG	6.731735	-94.339031	-41.1663	7.89882
8	3ICB	7.425543	-119.708526	-68.3466	6.91692
9	256BA	5.390923	-118.059756	-48.3401	12.9328
10	4CPV	5.108738	-123.618639	-73.8877	9.73448
11	1CCR	2.465812	-145.468158	-102.744	17.3265
12	2MHR	5.887625	-185.002005	-92.0686	15.7845
13	1MBA	3.995153	-184.497248	-129.042	13.8807
14	2FHA	6.311264	-224.000715	-139.049	13.4603

	Name	Z	E_Natives	E_mgs	E_mg_stds
0	1FC2C	15.881771	-69.026462	-13.1503	3.51826
1	1ENH	15.042489	-88.963180	-17.4922	4.75127
2	2GB1	18.434727	-127.254795	-23.9252	5.60516
3	2CRO	14.679367	-118.191521	-21.0059	6.62056
4	1CTF	11.772371	-124.534577	-27.5061	8.24205
5	4ICB	13.373861	-139.045679	-39.5249	7.44144
6	1R69	5.286558	-93.297144	-32.6843	11.4655
7	1UTG	4.003590	-89.440327	-46.6201	10.6955
8	3ICB	3.539106	-130.550986	-93.1623	10.5644
9	256BA	3.295272	-117.900011	-58.654	17.9791
10	4CPV	1.819500	-119.969886	-94.2593	14.1306
11	1CCR	0.210996	-134.562463	-129.278	25.0473
12	2MHR	1.620543	-143.789168	-105.903	23.3788
13	1MBA	0.711789	-182.177810	-167.432	20.7172
14	2FHA	1.192157	-189.992927	-167.524	18.8475

	Name	Z	E_Natives	E_mgs	E_mg_stds
0	1FC2C	10.888645	-27.679025	-5.20934	2.06359
1	1ENH	9.251952	-24.048072	-4.82697	2.07752
2	2GB1	12.410122	-43.414874	-7.88703	2.86281
3	2CRO	10.234838	-37.913786	-6.68482	3.05124
4	1CTF	7.645952	-24.047627	-5.33915	2.44685
5	4ICB	10.266856	-62.414439	-17.7518	4.35017
6	1R69	14.526236	-43.950599	-10.2149	2.3224
7	1UTG	18.617068	-64.288211	-19.3788	2.41227
8	3ICB	17.566746	-62.966443	-15.6378	2.69422
9	256BA	17.457549	-73.472311	-17.7937	3.18937
10	4CPV	14.822274	-77.716004	-23.531	3.65565
11	1CCR	13.619389	-99.237549	-32.6869	4.88646
12	2MHR	24.058968	-154.683226	-40.303	4.75416
13	1MBA	16.661417	-116.157490	-38.3161	4.67195
14	2FHA	17.594301	-170.013749	-53.5348	6.62026

	Name	Z	E_Natives	E_mgs	E_mg_stds
0	1FC2C	10.888645	-107.187656	-20.1733	7.99129
1	1ENH	9.251952	-93.126710	-18.6925	8.04524
2	2GB1	12.410122	-168.125094	-30.5427	11.0863
3	2CRO	10.234838	-146.822004	-25.8871	11.816
4	1CTF	7.645952	-93.124987	-20.676	9.47547
5	4ICB	10.266856	-241.701341	-68.7444	16.8461
6	1R69	14.526236	-170.199699	-39.5575	8.99353
7	1UTG	18.617068	-248.957567	-75.0448	9.34158
8	3ICB	17.566746	-243.838989	-60.5577	10.4334
9	256BA	17.457549	-284.523203	-68.9066	12.3509
10	4CPV	14.822274	-300.957002	-91.1242	14.1566
11	1CCR	13.619389	-384.299676	-126.581	18.9229
12	2MHR	24.058968	-599.014329	-156.074	18.4106
13	1MBA	16.661417	-449.822535	-148.38	18.0922
14	2FHA	17.594301	-658.382129	-207.315	25.6371

	Name	Z	E_Natives	E_mgs	E_mg_stds
0	1FC2C	23.480789	-107.187656	-15.5339	3.90335
1	1ENH	26.479082	-93.126710	-13.9019	2.99198
2	2GB1	22.975176	-168.125094	-28.8205	6.06327
3	2CRO	31.285547	-146.822004	-18.9095	4.08855
4	1CTF	11.639435	-93.124987	-18.7614	6.38894
5	4ICB	18.548103	-241.701341	-63.6676	9.59849
6	1R69	14.591790	-170.199699	-43.0636	8.71285
7	1UTG	20.877930	-248.957567	-78.7209	8.15391
8	3ICB	19.381229	-243.838989	-58.0876	9.58409
9	256BA	17.424797	-284.523203	-72.1624	12.1873
10	4CPV	14.375748	-300.957002	-86.6941	14.9045
11	1CCR	13.228477	-384.299676	-127.827	19.3879
12	2MHR	22.098569	-599.014329	-156.493	20.0249
13	1MBA	16.496307	-449.822535	-144.696	18.4967
14	2FHA	16.351727	-658.382129	-209.959	27.4236

	Name	Z	E_Natives	E_mgs	E_mg_stds
0	1FC2C	20.736046	-54.027982	-8.9613	2.17335
1	1ENH	25.163685	-75.813521	-12.4091	2.51968
2	2GB1	20.575525	-99.521555	-19.1208	3.90759
3	2CRO	27.249896	-105.345429	-14.488	3.33423
4	1CTF	13.052314	-98.729705	-19.4564	6.0735
5	4ICB	20.471473	-134.771453	-34.2875	4.90849
6	1R69	2.991517	-95.344303	-52.5771	14.2962
7	1UTG	3.541324	-108.984502	-68.9202	11.3134
8	3ICB	1.407566	-130.398896	-118.733	8.28805
9	256BA	1.246208	-137.257527	-107.214	24.1078
10	4CPV	0.786581	-126.445572	-109.126	22.0186
11	1CCR	0.699880	-243.915832	-215.168	41.0761
12	2MHR	1.438503	-181.273917	-139.694	28.905
13	1MBA	-0.685222	-179.977666	-193.978	20.432
14	2FHA	1.296879	-277.381832	-247.049	23.3892

	index	Name	Length	Seq
0	0	1A2J	188	AQYEDGKQYTTLEKPVAGAPQVLEFFSFFCPHCYQFEEVLHISDNV...
1	1	1A3H	300	SVVEEHGQLSISNGELVNERGEQVQLKGMSSHGLQWYGQFVNYESM...
2	2	1A5Y	284	EMEKEFEQIDKSGSWAAIYQDIRHEASDFPCRVAKLPKNKNRNRYR...
3	3	1A8Q	274	PICTTRDGVEIFYKDWGQGRPVVFIHGWPLNGDAWQDQLKAVVDAG...
4	0	1A2J	188	AQYEDGKQYTTLEKPVAGAPQVLEFFSFFCPHCYQFEEVLHISDNV...
5	1	1A3H	300	SVVEEHGQLSISNGELVNERGEQVQLKGMSSHGLQWYGQFVNYESM...
6	2	1A5Y	284	EMEKEFEQIDKSGSWAAIYQDIRHEASDFPCRVAKLPKNKNRNRYR...
7	3	1A8Q	274	PICTTRDGVEIFYKDWGQGRPVVFIHGWPLNGDAWQDQLKAVVDAG...

	index	Qw
0	0	0.060984
1	1	0.137520
2	2	0.222031
3	3	0.197840
4	4	0.273562
5	5	0.266006
6	6	0.286569
7	7	0.218554
8	8	0.260241
9	9	0.332594
10	10	0.398317
11	11	0.350271
12	12	0.416690
13	13	0.361991
14	14	0.379512
15	15	0.366766
16	16	0.351632
17	17	0.339836
18	18	0.291605
19	19	0.262566
20	20	0.265882
21	21	0.266080
22	22	0.264690
23	23	0.251368
24	24	0.259581
25	25	0.271665
26	26	0.428057
27	27	0.433507
28	28	0.410504
29	29	0.325923
...	...	...
1970	970	0.767218
1971	971	0.779492
1972	972	0.771003
1973	973	0.771163
1974	974	0.735662
1975	975	0.728089
1976	976	0.773779
1977	977	0.804048
1978	978	0.762598
1979	979	0.805954
1980	980	0.799129
1981	981	0.873564
1982	982	0.857443
1983	983	0.831532
1984	984	0.733035
1985	985	0.728545
1986	986	0.787008
1987	987	0.750969
1988	988	0.878250
1989	989	0.811806
1990	990	0.829812
1991	991	0.836699
1992	992	0.834538
1993	993	0.790569
1994	994	0.825448
1995	995	0.779203
1996	996	0.750317
1997	997	0.795165
1998	998	0.741362
1999	999	0.723373