In [1]:
import array
import os.path as op
import numpy as np
import matplotlib as mpl
import matplotlib.pylab as plt
%matplotlib inline
locate the data-set which should be in the repository
In [2]:
ls FTICR-Files/ESI_pos_Ubiquitin_000006.d
so now we can read it
In [3]:
BASE = 'FTICR-Files/ESI_pos_Ubiquitin_000006.d'
In [4]:
F = open( op.join(BASE, 'fid'), 'rb')
tbuf = F.read()
fid = np.array(array.array('i',tbuf))
F.close()
In [5]:
len(fid)
Out[5]:
In [6]:
fid[1000:1010]
Out[6]:
In [7]:
plt.plot(fid)
Out[7]:
In [8]:
sp = np.fft.fft(fid)
In [9]:
print sp[0]
len(sp)
Out[9]:
In [10]:
# default size of the figures
mpl.rcParams['figure.figsize'] = [10.0,8.0]
# this helps matplotlib to draw the huge vectors we're using (4-8Mpoints)
mpl.rcParams['agg.path.chunksize'] = 100000
In [11]:
plt.plot(sp)
Out[11]:
3 remarks
1 syntax remark
In [12]:
from numpy.fft import fft, rfft
sp = rfft(fid)
print sp[0]
len(sp)
Out[12]:
In [13]:
plt.plot(abs(sp)) # absolute value is modulus
Out[13]:
In [14]:
# let's zoom in
plt.plot(abs(sp[715000:725000]))
Out[14]:
In [15]:
# without the abs() we see the phase rotating
plt.plot(sp[720000:722000])
plt.figure()
# we rotate by 90° the phase
plt.plot(1j*sp[720000:722000])
Out[15]:
In [16]:
plt.plot(np.hamming(100))
Out[16]:
there is also blackman
hanning
bartlett
...
In [17]:
np.blackman.func_name
Out[17]:
In [18]:
for apod in (np.blackman, np.hamming, np.hanning, np.bartlett):
plt.plot(apod(100),label=apod.func_name)
plt.legend()
Out[18]:
In [19]:
sp2 = rfft( np.hamming(len(fid))*fid )
In [20]:
plt.plot(abs(sp2))
plt.figure()
plt.plot(abs(sp2[720000:722000]))
Out[20]:
Notice how lineshape is improved, and the isotopic pattern intensities restored
We can superimpose both processing, zooming on a line you can see that FWHM is sligthly worse when apodized.
Theory is 1.5 for hamming window, but does not really show here.
In [21]:
plt.plot(abs(sp)/max(sp))
plt.plot(abs(sp2)/max(sp2))
plt.xlim(721050,721080)
Out[21]:
In [22]:
n = len(fid)
fidzf = np.zeros(2*n)
In [23]:
fidzf[:n] = fid[:]
fidzf[:n] *= np.hamming(n)
spzf = abs(rfft( fidzf ))
plt.plot(abs(spzf[2*720000:2*722000]))
Out[23]:
lineshape and isotopic pattern intensities further improved !
In [24]:
plt.subplot(211)
plt.plot(abs(sp2[721100:721200]), label='direct')
plt.legend()
plt.subplot(212)
plt.plot(abs(spzf[2*721100:2*721200]), label='with zerofilling')
plt.legend()
Out[24]:
In FT-ICR, the frequency $f$ depends on the magnetic field $B_o$ the charge $z$ and the mass $m$ : $$ f = \frac{B_o z}{m} \quad \Rightarrow \quad m/z = \frac{B_o}{f}$$ We are going to use a sligthly extended equation which takes cares of experimental imperfections: $$ m/z = \frac{M_1}{M_2+f}$$ where $M_1$ and $M_2$ are constants determined precisely by a calibration procedure
FT-ICR parameters are stored in the .method files
In [25]:
ls 'FTICR-Files/ESI_pos_Ubiquitin_000006.d/ESI_pos_150_3000.m/apexAcquisition.method'
In [26]:
from xml.dom import minidom
def read_param(filename):
"""
Open the given file and retrieve all parameters from apexAcquisition.method
NC is written when no value for value is found
structure : <param name = "AMS_ActiveExclusion"><value>0</value></param>
read_param returns values in a dictionnary
"""
xmldoc = minidom.parse(filename)
x = xmldoc.documentElement
pp = {}
children = x.childNodes
for child in children:
if (child.nodeName == 'paramlist'):
params = child.childNodes
for param in params:
if (param.nodeName == 'param'):
k = str(param.getAttribute('name'))
for element in param.childNodes:
if element.nodeName == "value":
try:
v = str(element.firstChild.toxml())
#print v
except:
v = "NC"
pp[k] = v
return pp
In [27]:
pfile = op.join(BASE, 'ESI_pos_150_3000.m', 'apexAcquisition.method')
param = read_param(pfile)
print (param['TD'])
In [28]:
# print the 10 first entries
print(param.keys()[:10])
In [29]:
# we find the parameters in the parameter file
sw = float(param['SW_h_Broadband'])
ml1 = float(param['ML1'])
ml2 = float(param['ML2'])
print('Spectral width: {}'.format(sw) )
print('constant M_1: {}'.format(ml1))
print('constant M_2: {}'.format(ml2))
In [30]:
faxis = np.linspace(0, sw, len(spzf)) # the freq axis from 0 to sw
In [31]:
mzaxis = ml1/(ml2+faxis) # and the mzaxis
In [32]:
plt.plot(mzaxis, spzf)
plt.xlim(856.5, 858.5)
plt.xlabel("$m/z$")
Out[32]:
we can zoom on the monoisotopic peak, and try to determine precisely its value.
We know the theorical mass is 856.969496 (for the $z=10$ state).
In [33]:
plt.plot(mzaxis, spzf)
plt.xlim(856.9, 857.0)
plt.ylim(0,6E7)
plt.xlabel("$m/z$")
Out[33]:
In [34]:
# these functions convert back and forth from index to m/z
def itomz(val, N):
"""transform index to m/z for N points,
using current ml1 ml2 and sw
"""
f = sw*val/N
return ml1/(ml2+f)
def mztoi(m, N):
"""transform m/z to index for N points,
using current ml1 ml2 and sw
"""
f = ml1/m - ml2
return N*f/sw
theo = 856.9694962104804
def ppm(theorical, measured):
return 1E6*(measured-theorical)/measured
we can compute the vector coordinates of the previous zoom window
In [35]:
start = int( mztoi(857.0, len(spzf)))
end = int( mztoi(856.9, len(spzf)))
print("start: {} - end: {}".format(start,end))
plt.plot(spzf[start:end])
top = spzf[start:end].argmax() + start
meas1 = itomz(top,len(spzf))
print("maximum is at: {}, for m/z: {}".format(top,meas1))
print("For an error of {:.3f} ppm".format(ppm(theo, meas1)))
In [36]:
# peak barycenter
bary = 0.0
s = 0
for i in range(-3, +4):
bary += i*spzf[i+top]
s += spzf[i+top]
mbary = itomz(bary/s+top, len(spzf))
print ("peak barycenter is at {}".format(mbary))
print("For an error of {:.3f} ppm".format(ppm(theo, mbary)))
In [37]:
import glob
def read_fticr(folder):
"""
load and process the data Solarix Apex FTICR file found in folder
uses the calibration from the parameter file
eg:
spectrum,axis = read_fticr('FTICR/Files/bruker ubiquitin file/ESI_pos_Ubiquitin_000006.d')
"""
# find and load parameters
L = glob.glob(op.join(folder,"*","apexAcquisition.method"))
if len(L)>1:
raise Exception( "You have more than 1 apexAcquisition.method file in the %s folder, using the first one"%folder )
elif len(L) == 0:
raise Exception( "You don't have any apexAcquisition.method file in the %s folder, please double check the path"%folder )
param = read_param(L[0])
# load data
n = int(param['TD'])
fidzf = np.zeros(2*n)
with open( op.join(BASE, 'fid'), 'r') as F: # 'with' a better way of reading a file
tbuf = F.read(4*int(param['TD']))
fidzf[:n] = np.array(array.array('i',tbuf)) # [:] is less memory intensive
# process
fidzf[:n] *= np.hamming(n)
spectrum = abs( rfft( fidzf ) )
# calibrate
sw = float(param['SW_h_Broadband'])
ml1 = float(param['ML1'])
ml2 = float(param['ML2'])
faxis = np.linspace(0, sw, len(spectrum)) # the freq axis from 0 to sw
mzaxis = ml1/(ml2+faxis) # and the mzaxis
return spectrum, mzaxis
In [38]:
spectrum,axis = read_fticr('FTICR-Files/ESI_pos_Ubiquitin_000006.d')
In [39]:
# %matplotlib
plt.plot(axis, spectrum)
plt.xlabel("$m/z$")
plt.xlim(200,2500)
Out[39]:
In [40]:
plt.plot(axis, spectrum)
plt.xlabel("$m/z$")
plt.xlim(650,1300)
plt.title("Zooming in")
Out[40]:
In [41]:
plt.plot(axis, spectrum)
plt.xlabel("$m/z$")
plt.xlim(855,865)
plt.title("Zooming further")
Out[41]:
SPIKE is a complete software suite we're building to do this, and much more
It is distributed here : https://bitbucket.org/delsuc/spike
However, the program is not fully operational yet, stay tuned !
In [42]:
import sys
sys.path.append('/Users/mad/NPKV2/') # adapt this to your own set-up
import spike
from spike.File import Solarix
%matplotlib inline
In [43]:
File = 'FTICR-Files/ESI_pos_Ubiquitin_000006.d'
dd = Solarix.Import_1D(File)
In [44]:
dd.unit = "sec"
dd.display()
Out[44]:
In [45]:
dd.hamming().zf(2).rfft().modulus()
dd.unit = "m/z"
dd.display(zoom=(500,2000))
Out[45]:
In [46]:
# wrong calibration !
dd.axis1.calibA = float(dd.params["ML1"])
dd.axis1.calibB = float(dd.params["ML2"])
dd.display(zoom=(500,2000))
Out[46]:
In [47]:
dd.pp(threshold=1E7)
print ("detected %d peaks"%len(dd.peaks))
In [48]:
dd.display(zoom=(856.5, 858.5))
dd.display_peaks(zoom=(856.5, 858.5))
In [49]:
p = dd.peaks[603]
In [50]:
p.report()
Out[50]:
In [51]:
p.label = "Peak 103"
p.report() # in index
p.report(f=dd.axis1.itomz) # provides in m/z
# string is :
# id, label, m/z, intensity, width (0 if not determined)
Out[51]:
In [52]:
dd.centroid()
for p in dd.peaks:
p.label = "%.5f"%(dd.axis1.itomz(p.pos))
In [53]:
# this switches to a different display mode
dd.display(zoom=(856.5, 858.5))
dd.display_peaks(zoom=(856.5, 858.5), peak_label=True)