by Megat Harun Al Rashid bin Megat Ahmad
last updated: April 14, 2016
In general, Scientific Python package or better known as SciPy is a Python-based software for mathematics, science, and engineering. It includes some of the libraries that have been discussed before, i.e. **sympy**, **numpy**, **matplotlib** and **pandas**, as well as a library named **scipy**.
The scopes and applications of SciPy libraries are quite large to be covered in just one tutorial. Henceforth, only examples on specific subjects those the author has experienced doing the computations will be presented here.
Here we will see the fitting of a curve or actually a peak/peaks of an X-ray diffraction data of a composite material (the data are courtesy of Mr. Mohd Reusmazran, a colleague of mine).
Here we will use the functions available in **pandas** library as well as **scipy**, **numpy** and **matplotlib**.
In [1]:
# Importing function from pandas
from pandas import read_excel
# Reading excel file and extract data from specific sheet
from_excel = read_excel('Tutorial9/XRD 30_50PLLA2.xlsx','PLLA 50kGy')
In [2]:
# Checking the content
from_excel.head()
Out[2]:
In [3]:
# Extracting the array values
PLLA50kgy = from_excel.values
PLLA50kgy
Out[3]:
In [4]:
# Checking by selecting columns of interest
PLLA50kgy[1:,1:3]
Out[4]:
In [5]:
# Transposing the selected columns of interest
XRDPLLA50kgy = PLLA50kgy[1:,1:3].T
XRDPLLA50kgy
Out[5]:
In [6]:
# It is now easy to simply plot and see the diffraction pattern
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
plt.plot(XRDPLLA50kgy[0],XRDPLLA50kgy[1])
plt.xlabel(r'2$\theta$')
plt.ylabel('counts')
plt.show()
In [7]:
# Looking into peak of interest (from 5 to 30 of x-axis)
plt.figure(figsize=(4, 3))
# Checking the amount of data points
plt.plot(XRDPLLA50kgy[0],XRDPLLA50kgy[1], 'x')
#plt.plot(XRDPLLA50kgy[0],XRDPLLA50kgy[1])
plt.xlim(5,28.1167)
plt.ylim(0,2000)
plt.xlabel(r'2$\theta$')
plt.ylabel('counts')
plt.show()
The background fitting is necessary in order to fit this peak with a gaussian function. Thus the fitting will include an equation the sum the gaussian function and background. It is better make a guess first on the parameters values for this equation before performing fitting optimization.
In [8]:
# Initial plot and fitting parameters guessing (Gaussian)
# Select the plot range of interest
sudutE = XRDPLLA50kgy[0][5:700]
keamatanE = XRDPLLA50kgy[1][5:700]
a1 = 870; a2 = 1450;
b1 = 16; b2 = -9.5;
c1 = 3.5; c2 = 12;
# Manual fitting with Gaussian
x1_val = np.linspace(5,28,695) # Creating x-values
y2_val = a2*np.exp(-(x1_val-b2)**2/(2*c2**2))+400 # Gaussian background
y1_val = a1*np.exp(-(x1_val-b1)**2/(2*c1**2)) # Gaussian peak
plt.figure(figsize=(14, 3))
plt.plot(sudutE,keamatanE,'g') # Plot data
plt.plot(x1_val,y2_val,'b') # Plot background
plt.plot(x1_val,y1_val,'r') # Plot peak
plt.plot(x1_val,y1_val+y2_val,'r--') # Plot peak plus background
plt.title('Manual')
plt.ylabel('Intensity')
plt.xlabel(r'2$\theta$')
plt.xlim
Out[8]:
In [9]:
# Fitting curve using scipy curve_fit module
import numpy as np
from scipy.optimize import curve_fit
def fitGaussian(xvar,af,bf,cf,df,ae,be,ce):
return af*np.exp(-(xvar-bf)**2/(2*cf**2))+ae*np.exp(-(xvar-be)**2/(2*ce**2))+df
# it return best fit values for parameters
# pass the function name, x data, y data, initial value of parameters (in list)
popt, pcov = curve_fit(fitGaussian, sudutE, keamatanE,\
[870,16,3.5,400,1450,-9.5,12],sigma =keamatanE)
print popt # fitting parameters obtained
print pcov
In [10]:
# Plotting the best fit using the fitting parameters
y_val = popt[0]*np.exp(-(sudutE-popt[1])**2/(2*popt[2]**2))+popt[4]*\
np.exp(-(sudutE-popt[5])**2/(2*popt[6]**2))+popt[3]
plt.plot(sudutE,keamatanE,'g',label = "Data PLLA50kgy")
plt.plot(sudutE,y_val,'r-', ls='--', label="Exp Fit")
plt.title('Data Fitting')
plt.ylabel('Intensity')
plt.xlabel(r'2$\theta$')
plt.legend(loc = 'upper right')
plt.ylim(300,1800)
plt.xlim(5,28.1167)
plt.savefig('Tutorial9/Fitting_PLLA50kgy.jpeg')
It is now easy to obtain the peak properties, for instance, area under the peak (minus the background).
In [11]:
# Area under the peak using trapezoidal method
np.trapz(popt[0]*np.exp(-(sudutE-popt[1])**2/(2*popt[2]**2)), x=sudutE)
Out[11]:
Now let us try to fit all peaks the x-ray diffraction data.
In [12]:
def fitGaussianAll(xvar,af,bf,cf,df,ae,be,ce,ag,bg,cg,\
ag1,bg1,cg1,ag2,bg2,cg2,ag3,bg3,cg3,ag4,bg4,cg4,ag5,bg5,cg5):
return af*np.exp(-(xvar-bf)**2/(2*cf**2))+\
ae*np.exp(-(xvar-be)**2/(2*ce**2))+\
ag*np.exp(-(xvar-bg)**2/(2*cg**2))+\
ag1*np.exp(-(xvar-bg1)**2/(2*cg1**2))+\
ag2*np.exp(-(xvar-bg2)**2/(2*cg2**2))+\
ag3*np.exp(-(xvar-bg3)**2/(2*cg3**2))+\
ag4*np.exp(-(xvar-bg4)**2/(2*cg4**2))+\
ag5*np.exp(-(xvar-bg5)**2/(2*cg5**2))+\
df
a1 = 870; a2 = 1450; a3 = 7300; a4 = 300; a5 = 380; a6 = 300; a7 = 600; a8 = 500
b1 = 16; b2 = -9.5; b3 = 29.5; b4 = 36.1; b5 = 39.5; b6 = 43.3; b7 = 47.7; b8 = 48.7
c1 = 3.5; c2 = 12; c3 = 0.02; c4 = 0.02; c5 = 0.02; c6 = 0.02; c7 = 0.02; c8 = 0.02
df1 = 357.3
# it return best fit values for parameters
# pass the function name, x array, y array, initial value of parameters (in list)
poptAll, pcovAll = curve_fit(fitGaussianAll, XRDPLLA50kgy[0], XRDPLLA50kgy[1],\
[a1,b1,c1,df1,a2,b2,c2,a3,b3,c3,a4,b4,c4,a5,b5,c5,a6,b6,c6,a7,b7,c7,a8,b8,c8],\
sigma =XRDPLLA50kgy[1])
print poptAll
print pcovAll
In [13]:
plt.figure(figsize=(14, 6))
y_back = poptAll[4]*np.exp(-(XRDPLLA50kgy[0]-poptAll[5])**2/(2*poptAll[6]**2))+poptAll[3]
y_p1 = poptAll[0]*np.exp(-(XRDPLLA50kgy[0]-poptAll[1])**2/(2*poptAll[2]**2))
y_p2 = poptAll[7]*np.exp(-(XRDPLLA50kgy[0]-poptAll[8])**2/(2*poptAll[9]**2))
y_p3 = poptAll[10]*np.exp(-(XRDPLLA50kgy[0]-poptAll[11])**2/(2*poptAll[12]**2))
y_p4 = poptAll[13]*np.exp(-(XRDPLLA50kgy[0]-poptAll[14])**2/(2*poptAll[15]**2))
y_p5 = poptAll[16]*np.exp(-(XRDPLLA50kgy[0]-poptAll[17])**2/(2*poptAll[18]**2))
y_p6 = poptAll[19]*np.exp(-(XRDPLLA50kgy[0]-poptAll[20])**2/(2*poptAll[21]**2))
y_p7 = poptAll[22]*np.exp(-(XRDPLLA50kgy[0]-poptAll[23])**2/(2*poptAll[24]**2))
y_all = y_p1+y_p2+y_p3+y_p4+y_p5+y_p6+y_p7+y_back
plt.plot(XRDPLLA50kgy[0],XRDPLLA50kgy[1],'g')
plt.plot(XRDPLLA50kgy[0],y_back,'b')
plt.plot(XRDPLLA50kgy[0],y_p1,'r')
plt.plot(XRDPLLA50kgy[0],y_p2,'r')
plt.plot(XRDPLLA50kgy[0],y_p3,'r')
plt.plot(XRDPLLA50kgy[0],y_p4,'r')
plt.plot(XRDPLLA50kgy[0],y_p5,'r')
plt.plot(XRDPLLA50kgy[0],y_p6,'r')
plt.plot(XRDPLLA50kgy[0],y_p7,'r')
plt.plot(XRDPLLA50kgy[0],y_all,'r--')
plt.title('Manual')
plt.ylabel('Intensity')
plt.xlabel(r'2$\theta$')
Out[13]:
The optimization routine used above was done according to Levensberg-Marquadt algorithm which is the default method. Other methods that can be used are Trust Region Reflective and Dogbox algorithms.
This image processing example is about the investigation of internal nanostructures of ceramic particles from a Transmission Electron Microscope (TEM) image. This is part of research reported in Mater. Lett., 62(15) (2008) 2339-2342.
Here we will use the functions available in **scipy** library as well as **numpy** and **matplotlib**.
In [14]:
# Importing function from the scipy library
from scipy import misc
# Passing the image file as array to variable nanostr
nanostr = misc.imread('Tutorial9/06S11-13.TIF')
In [15]:
# Showing the image in grayscale
%matplotlib inline
import matplotlib.pyplot as plt
plt.imshow(nanostr, cmap=plt.cm.gray)
Out[15]:
In [16]:
# Checking the shape of the array
nanostr.shape
Out[16]:
The image can be enlarged to inspect the details. The ratio of the pixels (or arrays)can give the exact ratio of the image when enlarging it.
In [17]:
plt.figure(figsize=(10,10*nanostr.shape[0]/nanostr.shape[1]))
plt.imshow(nanostr, cmap=plt.cm.gray)
Out[17]:
In order to remove the label below the image, it is better to know the exact pixel position of the boundary (here this is done for the $y$-axis)
In [18]:
# Checking the array boundary for the label
plt.imshow(nanostr[1000:1050,:50],cmap=plt.cm.gray)
Out[18]:
The $y$-boundary for the label starts at about 1030 (as the image was extracted starting at pixel 1000).
In [19]:
# The image without label
plt.imshow(nanostr[:1030],cmap=plt.cm.gray)
Out[19]:
It is possible to extract the parametric length of a pixel from the scale bar in the label. This can be used later to estimate the size of the nanostructures inside the particles.
In [20]:
# Extracting the scale bar image
plt.imshow(nanostr[1030:,1076:1076+290],cmap=plt.cm.gray)
Out[20]:
In [21]:
nanostr[1030:,1076:1076+290].shape
Out[21]:
In the above the parametric length of a pixel (in nm) is:
In [22]:
pixSiz = 500/290.0
pixSiz, 'nm'
Out[22]:
The new area of the selected image without label can be passed to a new array.
In [23]:
nstr = nanostr[:1030]
Certain parts of the image, for instance, can be selected for further inspection.
In [24]:
# First, checking the shape of the new image
nstr.shape
Out[24]:
In [25]:
# Selecting new area of interest
# and tinkering with values of its pixels
import numpy as np
square = np.zeros(nstr.shape)
square[275:641,400:776]=100
In [26]:
# Plotting the image by highlighting the new area of interest
plt.plot([400,400],[275,641],'b',linewidth=1)
plt.plot([776,776],[275,641],'b',linewidth=1)
plt.plot([400,776],[275,275],'b',linewidth=1)
plt.plot([400,776],[641,641],'b',linewidth=1)
plt.imshow(nstr+square,cmap=plt.cm.gray)
Out[26]:
Applying a false color analysis of the image provide a rich and detail information on the nanostructures of the particle.
In [27]:
plt.imshow(np.log(nstr[275:641,400:776]),cmap=plt.cm.nipy_spectral_r)
plt.colorbar()
Out[27]:
The pixel values are in log scale thus the colorbar indicates that the values of interest is in the range of 4.3 to 5.2.
Sometimes a contour image can provide further information on the region of interest. The example below shows the contour image of the sample with modified pixel intensity of the original image and indicates the next selected image area as one that can provide details on the nanostructures.
In [28]:
plt.imshow((nstr[275:641,400:776]*0)+100, cmap=plt.cm.gray)
plt.contour(nstr[275:641,400:776])
plt.colorbar()
Out[28]:
Based on the contour plot above, an area is selected for further investigation.
In [29]:
# Selecting different region with different false color scheme
plt.plot([100,100],[1,75],'r',linewidth=2)
plt.plot([200,200],[1,75],'r',linewidth=2)
plt.plot([100,200],[1,1],'r',linewidth=2)
plt.plot([100,200],[75,75],'r',linewidth=2)
plt.imshow(np.log(nstr[275:641,400:776]), cmap=plt.cm.BrBG, vmin=4.3, vmax=5.2)
plt.colorbar()
Out[29]:
Displaying the new selected image:
In [30]:
plt.imshow(np.log(nstr[275:641,400:776][0:75,100:200]),cmap=plt.cm.gist_heat)
Out[30]:
In [31]:
# An image of a bar showing the measurement of the size
plt.plot([9.4,11.9],[9.6,12.5],'g',linewidth=7)
plt.imshow(np.log(nstr[275:641,400:776][0:75,100:200][50:70,20:40]),cmap=plt.cm.PuOr, \
interpolation='bicubic')
Out[31]:
Approximating the size of the nanostructures using value from scale bar in the original image.
In [32]:
print "The size of the nanostructures is %.2f nm." % \
(np.sqrt((11.9-9.4)**2 + (12.5-9.6)**2)*pixSiz)
In [33]:
from scipy import ndimage
In [34]:
im=nstr[275:641,400:776]
im = ndimage.gaussian_filter(im, 1)
sx = ndimage.sobel(im, axis=0, mode='constant')
sy = ndimage.sobel(im, axis=1, mode='constant')
sob = np.hypot(sx, sy)
plt.figure(figsize=(16, 5))
plt.subplot(141)
plt.imshow(im, cmap=plt.cm.gray)
plt.axis('off')
plt.title('square', fontsize=20)
plt.subplot(142)
plt.imshow(sx)
plt.axis('off')
plt.title('Sobel (x direction)', fontsize=20)
plt.subplot(143)
plt.imshow(sob)
plt.axis('off')
plt.title('Sobel filter', fontsize=20)
im = im + 0.07*np.random.random(im.shape)
sx = ndimage.sobel(im, axis=0, mode='constant')
sy = ndimage.sobel(im, axis=1, mode='constant')
sob = np.hypot(sx, sy)
plt.subplot(144)
plt.imshow(sob)
plt.axis('off')
plt.title('Sobel for noisy image', fontsize=20)
plt.subplots_adjust(wspace=0.02, hspace=0.02, top=1, bottom=0, left=0, right=0.9)
plt.show()
In [35]:
ext_nstr = (nstr[275:641,400:776]<225)*(nstr[275:641,400:776]>100)*nstr[275:641,400:776]
In [36]:
plt.imshow(ext_nstr)
Out[36]: