Analyze Training Reactions Script

Modified to analyze the training reactions in a particular family.


In [ ]:
familyName = 'R_Addition_MultipleBond'

In [ ]:
import numpy as np
import matplotlib.pyplot as plt
import itertools
import os

from IPython.display import display, HTML, Image
from base64 import b64encode
from io import BytesIO

from rmgpy.data.rmg import RMGDatabase
from rmgpy.rmg.model import Species
from rmgpy import settings
from rmgpy.exceptions import UndeterminableKineticsError, KineticsError
from rmgpy.reaction import Reaction
from rmgpy.data.kinetics.family import TemplateReaction

In [ ]:
database = RMGDatabase()
database.load(
    path = settings['database.directory'], 
    thermoLibraries = ['primaryThermoLibrary'], # can add others if necessary
    kineticsFamilies = [familyName], 
    kineticsDepositories = ['training'],
    reactionLibraries = [],
)

# Fill the tree by averaging, but do not add training reactions
for family in database.kinetics.families.values():
    family.fillKineticsRulesByAveragingUp(verbose=True)

In [ ]:
family = database.kinetics.families[familyName]

depository = family.getTrainingDepository()

entries = depository.entries.values()
entries.sort(key=lambda x: x.index)

print len(entries)

# for entry in entries:
#     display(entry.item)

In [ ]:
family = database.kinetics.families[familyName]

depository = family.getTrainingDepository()

entries = depository.entries.values()
entries.sort(key=lambda x: x.index)

reverse_entries = []

template_dict = {}

for entry in entries:
    for spc in entry.item.reactants + entry.item.products:
        if family.isMoleculeForbidden(spc.molecule[0]):
            print 'Found training reaction which is forbidden: {}'.format(entry)
            if 'forbidden' in template_dict:
                template_dict['forbidden'].append(entry)
            else:
                template_dict['forbidden'] = [entry]
            break

for entry in entries:
    try:
        template = family.getReactionTemplate(entry.item)
    except UndeterminableKineticsError:
        reverse_entries.append(entry)
        continue
    
    template_labels = tuple([item.label for item in template])
    reaction = TemplateReaction(
        reactants = [spc.copy(deep=True) for spc in entry.item.reactants],
        products = [spc.copy(deep=True) for spc in entry.item.products],
        template = template_labels,
        index = entry.index,
    )
    try:
        new_degeneracy = family.calculateDegeneracy(reaction)
    except KineticsError as e:
        print "#{0}".format(entry.index), e.message
    if entry.item.degeneracy != new_degeneracy:
        reaction.degeneracy = new_degeneracy
        entry.item.degeneracy = new_degeneracy  # Change the entry degeneracy
        print 'The degeneracy of #{3} {0} has been changed from {1} to {2}. The training reaction degeneracy is incorrect.'.format(
            entry.label,
            entry.item.degeneracy,
            reaction.degeneracy,
            entry.index,
        )
    else:
        reaction.degeneracy = entry.item.degeneracy
        print 'The degeneracy of #{2} {0} has been set to {1} based on the entry value.'.format(entry.label, entry.item.degeneracy, entry.index)
    reaction.kinetics = entry.data
    reaction.comment = entry.label
    
    if template_labels in template_dict:
        template_dict[template_labels].append(reaction)
    else:
        template_dict[template_labels] = [reaction]

for entry in reverse_entries:
    item = Reaction(reactants=entry.item.products, products=entry.item.reactants)

    try:
        template = family.getReactionTemplate(item)
    except UndeterminableKineticsError:
        print 'Found training reaction which does not fit: {}'.format(entry)
        if 'no_template' in template_dict:
            template_dict['no_template'].append(entry)
        else:
            template_dict['no_template'] = [entry]
        continue
    
    # get reverse kinetics
    temp = Reaction(
        reactants = [spc.copy(deep=True) for spc in entry.item.reactants],
        products = [spc.copy(deep=True) for spc in entry.item.products],
    )
    for spc in itertools.chain(temp.reactants, temp.products):
        spc.thermo = database.thermo.getThermoData(spc, trainingSet=True)
    temp.kinetics = entry.data
    kinetics = temp.generateReverseRateCoefficient()
    
    template_labels = tuple([item.label for item in template])
    reaction = TemplateReaction(
        reactants = [spc.copy(deep=True) for spc in entry.item.products],
        products = [spc.copy(deep=True) for spc in entry.item.reactants],
        template = template_labels,
        index = entry.index,
    )
    try:
        reaction.degeneracy = family.calculateDegeneracy(reaction)
    except KineticsError as e:
        print e.message
    reaction.kinetics = kinetics
    reaction.comment = 'Reverse of ' + entry.label
    
    if template_labels in template_dict:
        template_dict[template_labels].append(reaction)
    else:
        template_dict[template_labels] = [reaction]

In [ ]:
training_path = os.path.join(settings['database.directory'], 'kinetics', 'families', familyName, 'training')
print training_path
family.saveDepository(depository, training_path)

In [ ]:
# html table settings
full = 12
half = full / 2

for template_label, reactions in template_dict.iteritems():
    if template_label == 'forbidden' or template_label =='no_template':
        continue
    if len(reactions) == 1:
        reaction = reactions[0]

        template = database.kinetics.families[familyName].retrieveTemplate(template_label)

        templateSize = len(template)
        # html table currently uses a 12 column setup, so templates with 5 groups will break the table
        assert templateSize < 5

        # check what the current kinetics for this template are
        newKinetics = reaction.kinetics
        oldKinetics = database.kinetics.families[familyName].getKineticsForTemplate(template, degeneracy=reaction.degeneracy)[0]
        # evaluate kinetics
        tlistinv = np.linspace(1000/1500, 1000/300, num=10)
        tlist = 1000 * np.reciprocal(tlistinv)
        newklist = np.log10(np.array([newKinetics.getRateCoefficient(t) for t in tlist]))
        oldklist = np.log10(np.array([oldKinetics.getRateCoefficient(t) for t in tlist]))
        # create plot
        plt.cla()
        plt.plot(tlistinv, oldklist, label='Old')
        plt.plot(tlistinv, newklist, label='Training')
        plt.legend()
        plt.xlabel('1000/T')
        plt.ylabel('log(k)')
        fig = BytesIO()
        plt.savefig(fig, format='png')
        fig.seek(0)
        figdata = b64encode(fig.getvalue())
        fig.close()

        # format output using html
        html = ['<table style="width:100%;table-layout:fixed;"><tr>']
        html += ['<th colspan="{0}" style="color:green">One training reaction was matched to this template!</th>'.format(full)]
        html += ['</tr><tr>']
        html += ['<th colspan="{0}">Matched Template</th>'.format(half)]
        html += ['<td colspan="{0}">{1}</td>'.format(half, [entry.label for entry in template])]
        html += ['</tr><tr>']
        for entry in template:
            html += ['<td colspan="{0}">{1}</td>'.format(full/templateSize, entry.label)]
        html += ['</tr><tr>']
        for entry in template:
            html += ['<td colspan="{0}"><img src="data:image/png;base64,{1}"></td>'.format(full/templateSize, b64encode(entry.item._repr_png_()))]

        html += ['</tr><tr>']
        html += ['<td colspan="{0}">Entry #{1}</td>'.format(1, reaction.index)]
        html += ['<td colspan="{0}"><img src="data:image/png;base64,{1}"></td>'.format(full-1, b64encode(reaction._repr_png_()))]
        html += ['</tr><tr>']
        html += ['<th colspan="{0}">Reactant SMILES</th>'.format(half)]
        html += ['<td colspan="{0}">{1}</td>'.format(half, ' + '.join([reactant.molecule[0].toSMILES() for reactant in reaction.reactants]))]
        html += ['</tr><tr>']
        html += ['<th colspan="{0}">Product SMILES</th>'.format(half)]
        html += ['<td colspan="{0}">{1}</td>'.format(half, ' + '.join([product.molecule[0].toSMILES() for product in reaction.products]))]
        if 'Reverse' in reaction.comment:
            html += ['</tr><tr>']
            html += ['<th colspan="{0}" style="color:red;text-align:center">Note: Training reaction was written in opposite direction from reaction family.'.format(full)]
        html += ['</tr><tr>']
        html += ['<td colspan="{0}"><strong>Old Kinetics:</strong><br>{1}<br><br><strong>Training Rxn:</strong><br>{2}</td>'.format(half, oldKinetics, newKinetics)]
        html += ['<td colspan="{0}"><img src="data:image/png;base64,{1}"></td>'.format(half, figdata)]
        html += ['</tr></table>']

        display(HTML(''.join(html)))
    else:
        template = database.kinetics.families[familyName].retrieveTemplate(template_label)
        templateSize = len(template)

        oldKinetics = database.kinetics.families[familyName].getKineticsForTemplate(template, degeneracy=reaction.degeneracy)[0]
        newKinetics = []
        for i, reaction in enumerate(reactions):
            newKinetics.append(reaction.kinetics)

            if i == 0:        
                html = ['<table style="width:100%;table-layout:fixed;"><tr>']
                html += ['<th colspan="{0}" style="color:blue">Multiple training reactions were matched to this template. You should create new groups.</th>'.format(full)]
                html += ['</tr><tr>']
                html += ['<th colspan="{0}">Matched Template</th>'.format(half)]
                html += ['<td colspan="{0}">{1}</td>'.format(half, [entry.label for entry in template])]
                html += ['</tr><tr>']
                for entry in template:
                    html += ['<td colspan="{0}">{1}</td>'.format(full/templateSize, entry.label)]
                html += ['</tr><tr>']
                for entry in template:
                    html += ['<td colspan="{0}"><img src="data:image/png;base64,{1}"></td>'.format(full/templateSize, b64encode(entry.item._repr_png_()))]
                
            html += ['</tr><tr>']
            html += ['<th colspan="{0}">Match #{1}:</th>'.format(full, i + 1)]
            html += ['</tr><tr>']
            html += ['<td colspan="{0}">Entry #{1}</td>'.format(1, reaction.index)]
            html += ['<td colspan="{0}"><img src="data:image/png;base64,{1}"></td>'.format(full-1, b64encode(reaction._repr_png_()))]
            html += ['</tr><tr>']
            html += ['<th colspan="{0}">Reactant SMILES</th>'.format(half)]
            html += ['<td colspan="{0}">{1}</td>'.format(half, ' + '.join([reactant.molecule[0].toSMILES() for reactant in reaction.reactants]))]
            html += ['</tr><tr>']
            html += ['<th colspan="{0}">Product SMILES</th>'.format(half)]
            html += ['<td colspan="{0}">{1}</td>'.format(half, ' + '.join([product.molecule[0].toSMILES() for product in reaction.products]))]
            if 'Reverse' in reaction.comment:
                html += ['</tr><tr>']
                html += ['<th colspan="{0}" style="color:red;text-align:center">Note: Training reaction was written in opposite direction from reaction family.'.format(full)]

        # evaluate kinetics
        tlistinv = np.linspace(1000/1500, 1000/300, num=10)
        tlist = 1000 * np.reciprocal(tlistinv)
        oldklist = np.log10(np.array([oldKinetics.getRateCoefficient(t) for t in tlist]))
        newklist = []
        for kinetics in newKinetics:
            newklist.append(np.log10(np.array([kinetics.getRateCoefficient(t) for t in tlist])))
        # create plot
        plt.cla()
        plt.plot(tlistinv, oldklist, label='Old')
        for i, k in enumerate(newklist):
            plt.plot(tlistinv, k, label='Match #{0}'.format(i + 1))
        plt.legend()
        plt.xlabel('1000/T')
        plt.ylabel('log(k)')
        fig = BytesIO()
        plt.savefig(fig, format='png')
        fig.seek(0)
        figdata = b64encode(fig.getvalue())
        fig.close()

        html += ['</tr><tr><td colspan="{0}">'.format(half)]
        html += ['<strong>Old Kinetics:</strong><br>{0}'.format(oldKinetics)]
        for i, kinetics in enumerate(newKinetics):
            html += ['<br><br><strong>Match #{0} Kinetics:</strong><br>{1}'.format(i + 1, kinetics)]
        html += ['</td><td colspan="{0}"><img src="data:image/png;base64,{1}"></td>'.format(half, figdata)]

        html += ['</tr></table>']

        display(HTML(''.join(html)))

In [ ]: