In [ ]:
>>> Chemical('FeH4O4-2')
<Chemical [fe(oh)3--], T=298.15 K, P=101325 Pa>
# Fe(OH)4-2 is what you need.

In [3]:
def candidates(name):
    hits = [name]
    base, end = name.split('-')
    if end == '':
        try:
            i = int(base[-1])
            hits.append(base[0:-1] + '-' + str(i))
        except:
            pass
    if '[' in name or ']' in name:
        print(name)
        hits.extend(candidates(name.replace('[', '').replace(']', '')) )
    return hits
            
# candidates('AgH2O2-')
# candidates('[Ru(CN)6]3-')

In [6]:
from thermo import *

ans = ['AgH2O2-', 'AlF63-', 'AlO-', 'AlO2-', 'Al(OH)4-', 'AsO2-', 'AsO4-3', 'AsO43-', 'AuBr2-', 'AuBr4-', 'AuCl4-', 'B4O7-2', 'Be2O32-', 'BeO2-2', 'BF4-', 'BH4-', 'BiCl4-', 'BO2-', 'Br-', 'BrO-', 'BrO3-', 'BrO4-', 'C2O4-2', 'C2O42-', 'C2O4H-', 'Cd(OH)42-', 'CH3CO2-', 'CH3COO-', 'CHOO-', 'Cl-', 'ClO-', 'ClO2-', 'ClO3-', 'ClO4-', 'CN-', 'CO3-2', 'CO32-', 'Cr2O7-2', 'Cr2O72-', 'CrO2-', 'CrO4-2', 'CrO42-', 'Cr(OH)4-', '[Cu(CN)2]-', 'CuI2-', 'F-', '[Fe(CN)6]3-', 'Fe(CN)6-3', '[Fe(CN)6]4-', 'Fe(CN)6-4', 'FeOH2-', 'Fe(OH)3-', 'Fe(OH)4-', 'Fe(OH)4-2', 'Fe(OH)4-3', 'H2AlO3-', 'H2AsO3-', 'H2AsO4-', 'H2BO3-', 'H2GaO-3', 'H2P2-', 'H2P2O7-2', 'H2PO2-', 'H2PO4-', 'H3IO62-', 'H3Mo7O243-', 'HB4O7-', 'HCO2-', 'HCO3-', 'HCrO4-', 'HcrO4-', 'HF2-', 'HFeO4-', 'HO2-', 'HP2O7-3', 'HPbO2-', 'HPO32-', 'HPO3F-', 'HPO4-2', 'HPO42-', 'HS2O4-', 'HSe-', 'HSeO3-', 'HSeO4-', 'HSnO2-', 'HSO3-', 'HSO4-', 'I-', 'I3-', 'In(OH)4-', 'IO-', 'IO-3', 'IO3-', 'IO4-', '[IrCl6]2-', '[IrCl6]3-', 'MnO4-', 'MnO4-2', 'MnO42-', 'MoO4-2', 'MoO42-', 'N2O22-', 'N3-', 'Ni(OH)3-', 'NO2-', 'NO3-', 'OCN-', 'OH-', 'P2O7-4', '[PdCl4]2-', '[PdCl6]2-', 'Po-', 'PO4-3', 'PO43-', '[PtCl4]2-', '[PtCl6]2-', 'Re-', 'ReO4-', '[RhCl6]3-', '[Ru(CN)6]3-', '[Ru(CN)6]4-', 'RuO4-', 'RuO42-', 'S-2', 'S2-', 'S2-2', 'S22-', 'S2O3-2', 'S2O32-', 'S2O4-2', 'S2O42-', 'S2O62-', 'S2O8-2', 'S2O82-', 'S4O62-', 'SbO2-', 'SbO3-', 'SCN-', 'Se-2', 'Se2-', 'SeO3-2', 'SeO32-', 'SeO4-2', 'SeO42-', 'SH-', 'SiF6-2', 'SiF62-', 'SiO32-', 'SiO4-4', 'Sn(OH)62-', 'SO3-2', 'SO32-', 'SO4-2', 'SO42-', 'TcO4-', 'Te2-', 'TeO32-', 'TeO4-', 'Ti(OH)2-', 'Ti(OH)5-', 'VO3-', 'VO4-3', 'WO4-2', 'WO42-', 'ZnO22-', 'Zn(OH)42-']

for an in ans:
    an = an.replace('[', '').replace(']', '')
    worked = False
    for i in candidates(an):
        try:
            Chemical(i)
            worked = True
            break
        except:
            pass
#     if not worked:
#         if serialize_formula(an) in serialized:
#             worked = True
    if not worked:
        print(an, serialize_formula(an))


('AuBr2-', 'AuBr2-')
('Be2O32-', 'Be2O32-')
('BiCl4-', 'BiCl4-')
('C2O4H-', 'C2HO4-')
('Cd(OH)42-', 'CdH42O42-')
('CrO2-', 'CrO2-')
('Cu(CN)2-', 'C2CuN2-')
('CuI2-', 'CuI2-')
('H2AlO3-', 'AlH2O3-')
('H2BO3-', 'BH2O3-')
('H2GaO-3', 'GaH2O-3')
('H2P2-', 'H2P2-')
('H3IO62-', 'H3IO62-')
('H3Mo7O243-', 'H3Mo7O243-')
('HB4O7-', 'B4HO7-')
('HcrO4-', 'HcO4-')
('HFeO4-', 'FeHO4-')
('HPbO2-', 'HO2Pb-')
('HSnO2-', 'HO2Sn-')
('In(OH)4-', 'H4InO4-')
('IO-3', 'IO-3')
('Ru(CN)63-', 'C63N63Ru-')
('Ru(CN)64-', 'C64N64Ru-')
('RuO4-', 'O4Ru-')
('RuO42-', 'O42Ru-')
('SbO2-', 'O2Sb-')
('SbO3-', 'O3Sb-')
('Sn(OH)62-', 'H62O62Sn-')
('TeO4-', 'O4Te-')
('Ti(OH)2-', 'H2O2Ti-')
('ZnO22-', 'O22Zn-')
('Zn(OH)42-', 'H42O42Zn-')

In [1]:
from thermo.identifiers import ChemicalMetadataDB
from thermo import *
db = ChemicalMetadataDB(elements=False, main_db=('anion_db.tsv'), user_dbs=[])

serialized = {}

for CAS, d in db.CAS_index.items():
#     print(serialize_formula(d.formula))
    serialized[serialize_formula(d.formula)] = CAS
    
len(serialized), len(db.CAS_index)


Out[1]:
(157, 157)

In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [75]:


In [77]:



Out[77]:
(155, 155)

In [79]:
# serialized

In [ ]: