In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

sns.set(rc={'figure.figsize':(10,6.180)})
sns.set_style("whitegrid")

%matplotlib inline

In [4]:
def my_reorder(a, first):
    # move first to the top. and keep the rest
    new_order = first.copy()
    for col in a:
        if col not in first:
            new_order.append(col)
    return new_order

def read_pdb(name):
    all_data = []
    for i in range(20):
        for j in range(2):
            pre = "/Users/weilu/Research/server/nov_2018/iterative_optimization_4/all_simulations/"
            location = pre + f"{name}/simulation/{i}/{j}/"
            try:
                wham = pd.read_csv(location+"wham.dat")
            except:
                print(f"PDB: {name}, repeat: {i}, run: {j} not exist")
                continue
            wham.columns = wham.columns.str.strip()
            remove_columns = ['Tc', 'Energy']
            wham = wham.drop(remove_columns, axis=1)
            energy = pd.read_csv(location+"energy.dat")
            energy.columns = energy.columns.str.strip()
            remove_columns = ['Steps', 'Shake', 'Excluded', 'DSSP', 'Helix', 'AMH-Go', 'Vec_FM', 'Membrane', 'SSB']
            energy = energy.drop(remove_columns, axis=1)
            data = pd.concat([wham, energy], axis=1).assign(Repeat=i, Run=j)
            all_data.append(data)
    data = pd.concat(all_data).reset_index(drop=True)
    data = data.reindex(columns=my_reorder(data.columns, ["Steps", "Qw", "VTotal", "Run", "Repeat"]))
    print(name, len(data))
    return data

In [5]:
pdb_list = "1R69, 1UTG, 3ICB, 256BA, 4CPV, 1CCR, 2MHR, 1MBA, 2FHA".split(", ")
all_data = []
for p in pdb_list:
    name = p.lower()[:4]
    tmp = read_pdb(name)
    all_data.append(tmp)
data = pd.concat(all_data)


1r69 40000
1utg 40000
3icb 40000
256b 40000
PDB: 4cpv, repeat: 0, run: 1 not exist
4cpv 38000
1ccr 40000
PDB: 2mhr, repeat: 7, run: 1 not exist
2mhr 37898
PDB: 1mba, repeat: 1, run: 1 not exist
PDB: 1mba, repeat: 11, run: 1 not exist
PDB: 1mba, repeat: 14, run: 1 not exist
PDB: 1mba, repeat: 18, run: 1 not exist
1mba 16000
PDB: 2fha, repeat: 6, run: 1 not exist
PDB: 2fha, repeat: 7, run: 1 not exist
PDB: 2fha, repeat: 8, run: 1 not exist
PDB: 2fha, repeat: 16, run: 1 not exist
PDB: 2fha, repeat: 19, run: 1 not exist
2fha 14939

In [ ]: