In [1]:
import matplotlib.pyplot as plt
import sys
%matplotlib inline
sys.path.append('/Users/palmer/Documents/python_codebase/')
from pyImagingMSpec.hdf5.IMSdataset import IMSdataset
from pyMS import centroid_detection
from pyImzML import ImzMLParser
import h5py
import datetime
import numpy as np
from IPython.display import display, clear_output
import scipy.signal as signal
In [ ]:
In [2]:
### Provide file names
filename_imzML = '/Volumes/alexandr/shared/Luca/20150824_ADP_LR_Finger_Print_FullScan_DHBsub_80x80_15um.imzML'
filename_IMShdf5 = '/Volumes/alexandr/shared/Luca/20150824_ADP_LR_Finger_Print_FullScan_DHBsub_80x80_15um.hdf5'
centroids = True # <- lazy hack by Andy
### Provide some file info
# note, there is support for multiple sample types within a file but the code must be updated.
sample_name = 'finger print'
sample_source = 'Rappez'
sample_preparation = 'direct on glass'
maldi_matrix = 'DHB'
matrix_application = 'sublimation'
### Open files
f_in = ImzMLParser.ImzMLParser(filename_imzML)
f_out = h5py.File(filename_IMShdf5,'w') #create file, truncate if exists
### make root groups for output data
spectral_data = f_out.create_group('spectral_data')
spatial_data = f_out.create_group('spatial_data')
shared_data = f_out.create_group('shared_data')
### populate common variables - can hardcode as I know what these are for h5 data
# parameters
instrument_parameters_1 = shared_data.create_group('instrument_parameters/001')
instrument_parameters_1.attrs['instrument name'] = 'QExactivePlus'
instrument_parameters_1.attrs['analyser type'] = 'Qrbitrap'
instrument_parameters_1.attrs['data conversion'] = 'imzML->hdf5:'+str(datetime.datetime.now())
# m/z axis
#will centroid data so this doesn't exist
# ROIs
#todo - determine and propagate all ROIs
roi_1 = shared_data.create_group('regions_of_interest/001')
roi_1.attrs['name'] = 'root region'
roi_1.attrs['parent'] = ''
# Sample
#todo - not write empty properties
sample_1 = shared_data.create_group('samples/001')
sample_1.attrs['name'] = sample_name
sample_1.attrs['source'] = sample_source
sample_1.attrs['preparation'] = sample_preparation
sample_1.attrs['MALDI matrix'] = maldi_matrix
sample_1.attrs['MALDI matrix application'] = matrix_application
### write spectra
n=0;
for i,coords in enumerate(f_in.coordinates):
## rename as I'm using old code :S
spot = i
key=str(i)
## make new spectrum
mzs,ints = f_in.getspectrum(i)
if centroids == True:
mzs_list,intensity_list = mzs,ints
else:
ints=signal.savgol_filter(ints, 5, 2)
mzs_list,intensity_list,indices_list=centroid_detection.gradient(np.asarray(mzs),np.asarray(ints),max_output=-1,weighted_bins=3)
if not all([m>0 for m in intensity_list]):
raise ValueError('whoa, wtf?')
# add intensities
this_spectrum = spectral_data.create_group(key)
this_intensities = this_spectrum.create_dataset('centroid_intensities',data=np.float32(intensity_list),compression="gzip",compression_opts=9)
# add coordinates
if len(coords)==2:
coords = (coords[0],coords[1],0)
this_coordiantes = this_spectrum.create_dataset('coordinates',data=(coords[0],coords[1],coords[2]))
## link to shared parameters
# mzs
this_mzs = this_spectrum.create_dataset('centroid_mzs',data=np.float64(mzs_list),compression="gzip",compression_opts=9)
###
# ROI
this_spectrum['ROIs/001'] = h5py.SoftLink('/shared_data/regions_of_interest/001')
# Sample
this_spectrum['samples/001'] = h5py.SoftLink('/shared_data/samples/001')
# Instrument config
this_spectrum['instrument_parameters'] = h5py.SoftLink('/shared_data/instrument_parameters/001')
n+=1
if np.mod(n,10)==0:
clear_output(wait=True)
print '{:3.2f}\% complete\r'.format(round(10000.*n/len(f_in.coordinates))/100,end="\r")
sys.stdout.flush()
f_out.close()
print 'fin'
In [ ]:
In [ ]: