In [12]:
from thermo import *


file_name = '/home/caleb/Documents/University/CHE3123/thermo/thermo/Identifiers/chemical identifiers.tsv'
f = open(file_name)
f2 = open(file_name+'2', 'w')
for line in f:
    # This is effectively the documentation for the file format of the file
    f2.write(fix_line(line))
f2.close()
f.close()

In [11]:
from rdkit.Chem import *
from rdkit import Chem
from rdkit.Chem import Descriptors
from rdkit.Chem.rdMolDescriptors import CalcMolFormula

def fix_line(line):
    diff = False
    values = line.rstrip('\n').split('\t')
    (pubchemid, CAS, formula, MW, smiles, InChI, InChI_key, iupac_name, common_name) = values[0:9]
    others = values[9:]
    try:
        m = Chem.MolFromSmiles(smiles)
        for i in m.GetAtoms():
            if i.GetIsotope():
                formula = CalcMolFormula(m, True, True)
                diff = True
                break
            
    except:
        pass
    
    ans = '\t'.join((pubchemid, CAS, formula, MW, smiles, InChI, InChI_key, iupac_name, common_name))
    ans += '\t' + '\t'.join(others) +'\n'
    
    if diff:
        return  ans
    else:
        return line

In [ ]: