Find the coefficients for an unbalanced chemical reaction

Usage example:

balance_rxn("Cu + HNO3 -> Cu(NO3)2 + NO + H2O")

Note that case matters (each element starts with an upper case letter, and any following letters are lower case).


In [1]:
import numpy as np
from scipy.linalg import solve, lu
import string
import re
from fractions import Fraction
from math import gcd
from functools import reduce

In [2]:
def flatten_formula(compound_str):
    """Given a compound_str like:
         "Cu(NO3)2"
       Remove the parentheses, returning e.g.:
         "CuNO3NO3"
       This should make it easier to count up the occurrences of each element in compound_str.
    """
    p = re.compile(r"\(\w+\)\d+")
    while True:
        m = p.search(compound_str)
        if not m:
            break
        base, repeats = m.group().split(')')
        expanded = base.replace('(','') * int(repeats)
        compound_str = compound_str.replace(m.group(),expanded)
    return compound_str

In [3]:
def get_element_counts(compound_str):
    """Given a compound_str like:
         "H2O"
       Return a dict with the count of each component element, e.g.:
         {"H":2,"O":1}
    """
    # expand parenthetical parts:
    compound_str = flatten_formula(compound_str)
    # break compound into substrings for each element, e.g. "H2O" -> ["H2", "O"]:
    elements = re.findall(r"[A-Z][a-z]*[0-9]*", compound_str)
    # count up the number of atoms of each element:
    counts = {}
    for element in elements:
        e = element.rstrip(string.digits)
        count = element[len(e):]
        count = int(count) if len(count)>0 else 1
        if e in counts:  # e.g. if compound was "HC2H2O2", elements will contain H twice
            counts[e] += count
        else:
            counts[e] = count
    return counts

In [4]:
def get_react_prod_from_rxn(reaction_str):
    """Given a reaction_str like:
         "H2 + O2 -> H2O"
       Return the reactants and products as separate lists, e.g.:
         ['H2', 'O2'], ['H2O']
    """
    reactants, products = reaction_str.split("->")
    reactants = [c.strip() for c in reactants.split("+")]
    products  = [c.strip() for c in products.split("+")]
    return reactants, products

In [5]:
def scale_coefficients(coeffs):
    """Take a numpy array of coefficients (which may be floats, less than 1, etc.), and
    return a corresponding array of (minimal) integers."""
    coeffs = coeffs.copy()
    for c in coeffs:
        coeffs *= Fraction(c).limit_denominator().denominator
    coeffs = np.rint(coeffs).astype(int)
    coeffs //= reduce(gcd, coeffs)
    return coeffs

In [6]:
def find_coefficients(reaction_str):
    """Given a reaction_str like:
         "H2 + O2 -> H2O"
       Return the balanced coefficients for the terms, e.g.:
         [2, 1, 2]
    """
    # Parse out the individual terms of the reaction:
    reactants, products = get_react_prod_from_rxn(reaction_str)
    compounds = reactants + products  # e.g. ["H2", "O2", "H2O"]
    # Identify all elements involved:
    all_elements = []
    for c in compounds:
        all_elements += re.findall(r"[A-Z][a-z]*", c)
    all_elements = list(set(all_elements))  # e.g. ["H", "O"]
    # Set up the matrix A using the given reaction:
    num_coeffs = len(compounds)  # how many columns in A, i.e. how many values we need to find
    num_elements = len(all_elements)  # how many rows in A, i.e. how many equations we can write
    A = np.zeros((num_elements,num_coeffs))
    compound_counts = [get_element_counts(c) for c in compounds]
    for Arow, element in enumerate(all_elements):
        for Acol, compound in enumerate(compound_counts):
            if element in compound:
                Asign = 1 if Acol < len(reactants) else -1
                A[Arow][Acol] = Asign * compound[element]
    # Make A square (i.e. solvable) by eliminating redundant equations and/or assigning
    # a value of 1 to a single free variable:
    _,_,u = lu(A)
    A = u[~np.all(u==0,axis=1)]  # get linearly independent equations only
    if len(A) < num_coeffs-1 or len(A) > num_coeffs:
        # we generated conflicting equations or not enough equations
        raise ValueError("Unable to balance the given reaction.")
    b = np.zeros((len(A),1))
    if len(A) < num_coeffs:
        A = np.concatenate([A,np.zeros((1,num_coeffs))])
        A[-1,-1] = 1
        b = np.append(b,[1])
        # TODO: make sure the last column (i.e. coefficient) is always the free variable after LU
    # Solve Ax=b, and return the (scaled) answer:
    coeffs = solve(a=A, b=b)
    coeffs = scale_coefficients(coeffs)
    return coeffs
    # TODO: split up this function more

In [7]:
def balance_rxn(reaction_str):
    """Given a reaction_str like:
         "H2 + O2 -> H2O"
       Print the balanced reaction, e.g.:
         "2 H2 + 1 O2 -> 2 H2O"
    """
    coeffs = find_coefficients(reaction_str)
    reactants, products = get_react_prod_from_rxn(reaction_str)
    solved_reactants = ["%d %s" % (coeffs[ri],                r) for ri,r in enumerate(reactants)]
    solved_products  = ["%d %s" % (coeffs[pi+len(reactants)], p) for pi,p in enumerate(products)]
    print(" + ".join(solved_reactants) + " -> " + " + ".join(solved_products))

Test it


In [8]:
balance_rxn("H2 + O2 -> H2O")


2 H2 + 1 O2 -> 2 H2O

In [9]:
balance_rxn("C4H10 + O2 -> CO2 + H2O")


2 C4H10 + 13 O2 -> 8 CO2 + 10 H2O

In [10]:
balance_rxn("Cu + HNO3 -> Cu(NO3)2 + NO + H2O")


3 Cu + 8 HNO3 -> 3 Cu(NO3)2 + 2 NO + 4 H2O

In [11]:
balance_rxn("H2O -> H2O")


1 H2O -> 1 H2O

In [12]:
# http://forums.xkcd.com/viewtopic.php?t=113675
balance_rxn("K4Fe(SCN)6 + K2Cr2O7 + H2SO4 -> Fe2(SO4)3 + Cr2(SO4)3 + CO2 + H2O + K2SO4 + KNO3")


6 K4Fe(SCN)6 + 97 K2Cr2O7 + 355 H2SO4 -> 3 Fe2(SO4)3 + 97 Cr2(SO4)3 + 36 CO2 + 355 H2O + 91 K2SO4 + 36 KNO3

TODO

  • make this a package, including tests, README, etc.?
  • OOP example?

In [ ]: