Does removing ChemicalEnvironments speed up sampling

This ipython notebook is being used to determine if removing the list of chemicalenvironments would significantly increase the speed of smirky


In [2]:
from openforcefield.typing.chemistry.environment import TorsionChemicalEnvironment
import time
import copy
import numpy as np
from numpy import random
from smarty.atomtyper import AtomTyper

In [3]:
def smirks_sampling(smirks, iterations):
    """
    This method takes in a list of smirks strings, then randomly picks one
    creates a chemical environment and writes back out the smirks string
    
    It only stores the smirks strings and only sometimes keeps the "new one" 
    the new one is just a copy of the randomly chosen current one
    """
    current = copy.deepcopy(smirks)
    for i in range(iterations):
        change = random.choice(current)
        
        env = TorsionChemicalEnvironment(smirks = change)
        new_smirks = env.asSMIRKS()
        
        # assume we accept a move 30% of the time and extend the list
        if random.rand() < 0.3: 
            current.append(new_smirks)

    return current

def environment_sampling(smirks, iterations):
    """
    This method taks in a list of smirks, turns them into chemical environments
    and then iterates where some percentage of the time you keep the new environment
    """
    current = [TorsionChemicalEnvironment(smirks = c) for c in smirks]
    
    for i in range(iterations):
        change = copy.deepcopy(random.choice(current))
        new_smirks = change.asSMIRKS
        
        # keep the new one 30% of the time
        if random.rand() < 0.3:
            current.append(change)
            
    return [e.asSMIRKS for e in current]

def run_samplings(smirks, iterations):
    """
    This method runs smirks_sampling and environment sampling and returns the time for each using 
    the same input list and number of iterations
    """
    
    # smirks first 
    init_time = time.time()
    smirks = smirks_sampling(smirks, iterations)
    end_time = time.time()
    smirks_time = (end_time - init_time) / 60.0
    
    # environments
    init_time = time.time()
    env_smirks = environment_sampling(smirks, iterations)
    end_time = time.time()
    env_time = (end_time - init_time) / 60.0
    
    return smirks_time, env_time

In [6]:
long = AtomTyper.read_typelist('Torsion_0_0.00e+00_results.smarts')
long = [smirks for (smirks,name) in long if not '$' in smirks]
smirks_lists = {
    'generic':['[*:1]~[*:2]~[*:3]~[*:4]'],
    'short':copy.deepcopy(long[:10]),
    'long':copy.deepcopy(long)}

iterations = [2, 10, 100, 1000, 10000, 30000]

for its in iterations:
    print('%s  %i Iterations  %s' % ('-'*30, its, '-'*30))
    for title, smirks in smirks_lists.items():
        smirks_time, env_time = run_samplings(smirks, its)
        dif = env_time - smirks_time
        print("%20s\t%.2e\t%.2e\t%.2e" % (title, smirks_time, env_time,dif))
    print('\n')


------------------------------  2 Iterations  ------------------------------
               short	1.97e-05	6.54e-05	4.57e-05
                long	1.93e-05	4.58e-04	4.39e-04
             generic	1.34e-05	1.82e-05	4.84e-06


------------------------------  10 Iterations  ------------------------------
               short	7.12e-05	1.16e-04	4.53e-05
                long	8.27e-05	5.40e-04	4.58e-04
             generic	6.60e-05	6.47e-05	-1.23e-06


------------------------------  100 Iterations  ------------------------------
               short	6.19e-04	7.01e-04	8.20e-05
                long	7.44e-04	1.36e-03	6.12e-04
             generic	5.49e-04	6.28e-04	7.92e-05


------------------------------  1000 Iterations  ------------------------------
               short	7.59e-03	1.73e-02	9.76e-03
                long	8.42e-03	2.10e-02	1.26e-02
             generic	6.89e-03	1.61e-02	9.20e-03


------------------------------  10000 Iterations  ------------------------------
               short	8.89e-02	1.09e+00	9.98e-01
                long	9.37e-02	1.17e+00	1.08e+00
             generic	7.18e-02	1.12e+00	1.05e+00


------------------------------  30000 Iterations  ------------------------------
               short	3.61e-01	1.04e+01	1.00e+01
                long	4.51e-01	1.08e+01	1.04e+01
             generic	3.13e-01	1.01e+01	9.76e+00



In [ ]: