Read spectra files exported from the Bruker Spectra Library. All spectra files shall end with ".spectrum" and be located in one folder. Only one spectrum per file.
Please, specify the folder containing *.spectrum files and the name for the results file:
In [1]:
folder = 'D:\data\Libraries\Example_Xpec'
archive = 'all_spectra.json'
Load modules and define functions.
In [2]:
import sys
import os.path
import codecs
import json
def dpstr2dict(filename, data_points_string, meta_count):
data_points = {} # using a dict to store the spectrum
# TODO: maybe use numpy ndarray instead
points = data_points_string.strip().split()
count = len(points)
if (count / 2) != int(meta_count):
print "Could not convert string to dict! Data point mismatch in spectrum: " + filename
return data_points_string # preserve original data
for i in xrange(0, count, 2):
data_points[float(points[i])] = int(points[i + 1])
#print (meta_count)
#print len(data_points)
return data_points
def readspectrum(filename, filecontent):
spectrum = {'SpecFile': filename}
values = ""
for line in filecontent.splitlines():
contents = line.split(':')
if len(contents) == 2:
key, value = contents
spectrum[key.strip()] = value.strip()
# 'Date', 'AnalName' and probably comments have multiple ':'
elif len(contents) > 2:
key = contents[0]
value = ':'.join(contents[1:])
spectrum[key.strip()] = value.strip()
# spectra do not contain ":"
else:
values += contents[0]
values = dpstr2dict(filename, values, spectrum['Num Peaks'])
spectrum['Values'] = values
return spectrum
Run the main script, which uses the functions above to collect all spectra in a list of dicts.
In [3]:
folder = os.path.abspath(folder)
if not os.path.exists(folder) or not os.path.isdir(folder):
print "Folder not found!"
sys.exit(0)
library = []
for spectrum in os.listdir(folder):
if not spectrum.endswith('.spectrum'):
print "Skipped file: " + spectrum
continue
spectrum = os.path.join(folder, spectrum)
#print spectrum
with codecs.open(spectrum, 'r', 'cp1252') as s:
data = readspectrum(spectrum, s.read())
library.append(data)
print 'The library contains ' + str(len(library)) + ' spectra.'
Store all spectra in a single JSON file.
In [4]:
archive = os.path.join(folder, archive)
with open(archive, 'w') as out:
json.dump(library, out, indent = 4, sort_keys = True)