Bruker LibraryEditor Export Parser

Read .library files exported from the Bruker Compass LibraryEditor 4.2 and save as JSON


In [1]:
source  = 'Example.library'
destination = 'Example.json'

Load modules and define functions.


In [2]:
import re
import codecs
import json

def dpstr2dict(filename, data_points_string, meta_count):
    data_points = {}
    points = data_points_string.strip().split()
    count = len(points)
    if (count / 2) != int(meta_count):
        print "Could not convert string to dict! Data point mismatch in spectrum: " + filename
        return data_points_string # preserve original data
    for i in xrange(0, count, 2):
        data_points[float(points[i])] = int(points[i + 1])
    #print (meta_count)
    #print len(data_points)
    return data_points

def readspectrum(filename, filecontent):
    spectrum = {'SpecFile': filename, 'Comment': ''}
    #spectrum = {}
    values = ""
    for line in filecontent.splitlines():
        contents = line.split(':')
        if len(contents) == 2:
            key, value = contents
            spectrum[key.strip()] = value.strip()
        # 'Date', 'AnalName' and probably comments have multiple ':'
        elif len(contents) > 2:
            key = contents[0]
            value = ':'.join(contents[1:])
            spectrum[key.strip()] = value.strip()
        # spectra do not contain ":"
        else:
            values += contents[0]
    #print spectrum
    values = dpstr2dict(filename, values, spectrum['Num Peaks'])
    spectrum['Values'] = values
    return spectrum

Run the main script, which uses the functions above to collect all spectra in a dict of lists of dicts.


In [3]:
with codecs.open(source, 'r', 'cp1252') as s:
    spec_list = re.split('\r\n\r\n', s.read())[:-1]

library = {0: [{'Name': None}]}
i = 1

for spec in spec_list:
    data = readspectrum(source, spec)
    h = i - 1
    if data['Name'] == library[h][-1]['Name']:
        library[h].append(data)
    else:
        library[i] = [data]
        i += 1

del library[0]

print 'The library contains ' + str(len(spec_list)) + ' spectra in ' + str(len(library)) + ' compounds.'


The library contains 415 spectra in 350 compounds.

Save spectra in JSON file.


In [4]:
with open(destination, 'w') as out:
    json.dump(library, out, indent = 4, sort_keys = True)