(c) Copyright 2019 Le Losq et al.
Le Losq, Berry, Kendrick, Neuville, O'Neill, Determination of the oxidation state of iron in Mid-Ocean Ridge basalt glasses by Raman spectroscopy, American Mineralogist
Code tested in Python 3.6. It will not work in Python 2.7 as I use unpacking in calls of functions for instance.
Dependencies:
A copy of this code will also be hosted on https://github.com/charlesll/rampy/examples/Iron_MORB/ On the long term, any code improvement will be posted at this address.
In [1]:
# Library import
%matplotlib inline
import scipy
import numpy as np
np.random.seed(seed=42) # fix the random seed for reproducible results
import scipy.optimize as op
import uncertainties # we use the package uncertainties to handle the uncertainties and their correlations
import uncertainties.unumpy as un
from uncertainties import ufloat
import sklearn
from sklearn.metrics import mean_squared_error
from sklearn import model_selection
from sklearn.decomposition import NMF
from sklearn.neural_network import MLPRegressor
from sklearn.ensemble import BaggingRegressor
import matplotlib
from matplotlib import pyplot as plt
from matplotlib.gridspec import GridSpec
import matplotlib.gridspec as gridspec
import pandas as pd
import rampy as rp
import cvxpy
import iron # the local module that contains data treatment functions
from pymcr.mcr import McrAR as McrAls
from pymcr.regressors import OLS, NNLS
from pymcr.constraints import ConstraintNonneg, ConstraintNorm
# style for the plots
matplotlib.rcParams['figure.figsize'] = 3.22, 3.22
matplotlib.rcParams['font.size']= 10
matplotlib.rcParams['font.family']= "sans-serif"
matplotlib.rcParams['font.sans-serif']= "Arial"
matplotlib.rcParams['axes.labelsize']= 12
matplotlib.rcParams['axes.titlesize']= 12
matplotlib.rcParams['xtick.labelsize']= 10
matplotlib.rcParams['ytick.labelsize']= 10
matplotlib.rcParams['lines.markersize']= 8
matplotlib.rcParams['lines.linewidth']= 1.0
matplotlib.rcParams['legend.fontsize']= 9
matplotlib.rcParams['legend.frameon']= True
matplotlib.rcParams['legend.markerscale']= 1.0
matplotlib.rcParams['legend.handletextpad']= 0.5
matplotlib.rcParams['text.usetex']= True
In [2]:
liste_standards = pd.read_excel("./MORB_datalist.xlsx", sheet_name="standards")
# preparing the data for the standards
x, raw_record, bas2_std, x_fit, y_fit_nosmo, y_fit, nb_exp = iron.preparing_data(liste_standards,cutoff=np.array([850.,1140.]))
# getting the sample redox ratio
fe3_standards = liste_standards["Fe3_moss"]
#getting the sample log fO2
fo2 = liste_standards["logfo2"]
# getting the mean values of the spectra and the cutoff baseline
mean_record = np.ones((x.shape[0],int(nb_exp/5)))
mean_y_fit = np.ones((y_fit.shape[0],int(nb_exp/5)))
mean_bas2_std = np.ones((len(x),int(nb_exp/5)))
counter = 0
for i in range(0,nb_exp,5):
mean_record[:,counter]=np.mean(raw_record[:,i:i+5],1)
mean_y_fit[:,counter]=np.mean(y_fit[:,i:i+5],1)
mean_bas2_std[:,counter]=np.mean(bas2_std[:,i:i+5],1)
counter = counter+1
mean_fo2 = liste_standards["logfo2"].iloc[range(0,nb_exp,5)].values
mean_fe3 = liste_standards["Fe3_moss"].iloc[range(0,nb_exp,5)].values
In [3]:
# Figure 1
scale_fig = 1000.
fig1 = plt.figure(figsize=(6.73,3.24),dpi=300)
# taken from https://stackoverflow.com/questions/26545897/drawing-a-colorbar-aside-a-line-plot-using-matplotlib?utm_medium=organic&utm_source=google_rich_qa&utm_campaign=google_rich_qa
# mean_fe: an array of parameters, each of our curves depend on a specific
# value of parameters
# norm is a class which, when called, can normalize data into the
# [0.0, 1.0] interval.
norm = matplotlib.colors.Normalize(
vmin=np.min(mean_fe3),
vmax=np.max(mean_fe3))
# choose a colormap
c_m = matplotlib.cm.plasma
# create a ScalarMappable and initialize a data structure
s_m = matplotlib.cm.ScalarMappable(cmap=c_m, norm=norm)
s_m.set_array([])
# plotting spectra
# calling the ScalarMappable that was initialised with c_m and norm
for i in range(mean_fe3.shape[0]):
plt.plot(x,
mean_record[:,i],
color=s_m.to_rgba(mean_fe3[i]))
# we plot the colorbar, using again our
# ScalarMappable
c_bar = plt.colorbar(s_m)
c_bar.set_label(r"Fe$^{3+}$/Fe\textsuperscript{TOT}")
plt.plot([505,505], [0.,1.5],"--",color="black",alpha=0.7)
plt.plot([570,570], [0.,1.5],"--",color="black",alpha=0.7)
plt.plot([660,660], [0.,1.5],"--",color="black",alpha=0.7)
plt.plot([735,735], [0.,1.5],"--",color="black",alpha=0.7)
plt.plot([804,804], [0.,1.5],"--",color="black",alpha=0.7)
plt.plot([930,930], [0.,2.5],"--",color="black",alpha=0.7)
plt.plot([1005,1005], [0.,2.7],"--",color="black",alpha=0.7)
plt.annotate("A",xy=(505,1.6),color="black",ha="center",xycoords="data")
plt.annotate("B",xy=(570,1.6),color="black",ha="center",xycoords="data")
plt.annotate("C",xy=(660,1.6),color="black",ha="center",xycoords="data")
plt.annotate("D",xy=(735,1.6),color="black",ha="center",xycoords="data")
plt.annotate("E",xy=(804,1.6),color="black",ha="center",xycoords="data")
plt.annotate("F",xy=(930,2.6),color="black",ha="center",xycoords="data")
plt.annotate("G",xy=(1010,2.8),color="black",ha="center",xycoords="data")
plt.xlabel(r"Raman shift, cm\textsuperscript{-1}")
plt.ylabel("Intensity, arbitrary units")
plt.xticks((300,500,700,900,1100,1300))
plt.xlim(300,1300); plt.ylim(-0.02,3.0)
plt.tight_layout()
fig1.savefig("./figures/Figure1.pdf")
In [4]:
# Figure 2
fig2 = plt.figure(figsize=(6.73,3.24),dpi=300)
plt.subplot(1,2,1)
plt.plot(x,mean_record[:,12],"-",color=s_m.to_rgba(mean_fe3[12]))
plt.plot(x[(x>850)&(x<1140)],mean_bas2_std[(x>850)&(x<1140),12],"r--")
plt.xlim(800,1300)
plt.text(985,1.9,"raw\nspectrum",fontsize=12,rotation=0,color=s_m.to_rgba(mean_fe3[12]),ha="center")
plt.text(950,0.46,"cut-off\nbaseline",fontsize=12,rotation=0,color="red",ha="center")
plt.annotate("A)",xy=(0.07,0.93),xycoords='axes fraction',ha='center',va='center')
plt.xlabel(r"Raman shift, cm$^{-1}$"); plt.ylabel("Intensity, arbitrary units")
plt.subplot(1,2,2)
# plotting spectra
# calling the ScalarMappable that was initialised with c_m and norm
for i in range(mean_fe3.shape[0]):
plt.plot(x_fit,
mean_y_fit[:,i],
color=s_m.to_rgba(mean_fe3[i]))
# having plotted the 11 curves we plot the colorbar, using again our
# ScalarMappable
c_bar = fig2.colorbar(s_m)
c_bar.set_label(r"Fe$^{3+}$/Fe\textsuperscript{TOT}")
plt.xticks([850,900,950,1000,1050,1100,1150])
plt.xlim(830,1150)
plt.ylim(-0.03,1.2)
plt.plot([930,930],[0.,0.96],"k--")
plt.plot([1005,1005],[0.,1.1],"k--")
plt.annotate("F",xy=(930,0.98),color="black",ha="center",xycoords="data")
plt.annotate("G",xy=(1005,1.13),color="black",ha="center",xycoords="data")
plt.annotate("B)",xy=(0.07,0.93),xycoords='axes fraction',ha='center',va='center')
plt.xlabel(r"Raman shift, cm$^{-1}$"); plt.ylabel("Intensity, normalised")
plt.tight_layout()
plt.savefig("./figures/Figure2.pdf")
Smoothing with the Whittaker filter removes a good amount of noise. The lambda parameter is set to 10^3, it is appropriate for the present data.
See in reference list Eilers et al. (2003) for details on the algorithm as well as the documentation of Rampy.
In [5]:
plt.figure(figsize=(6,6),dpi=300)
plt.plot(x_fit, y_fit_nosmo[:,0]/np.trapz(y_fit_nosmo[:,0],x_fit[:,0]),"k-",label="Unsmoothed")
plt.plot(x_fit, y_fit[:,0]/np.trapz(y_fit[:,0],x_fit[:,0]),"r-",label="Smoothed")
plt.xlim(830,1150)
plt.legend()
plt.ylabel("Intensity, arbitrary unit")
plt.xlabel(r"Raman shift, cm$^{-1}$")
plt.tight_layout()
In [6]:
# Fig 3
I930_std = iron.intensity(x_fit,y_fit,idx1=929.0, idx2=931.0)
print("")
print("Correlation coefficient between I930 and glass Fe3+/FeTOT is equal to %f" % np.corrcoef(I930_std,fe3_standards)[0,1])
popt_I, pcov_I = op.curve_fit(rp.linear,I930_std,fe3_standards)# least square fit using the rp.linear function
params_I = uncertainties.correlated_values(popt_I, pcov_I)# package fitted parameters with their uncertainties
print("")
print("The parameters of the calibration Fe3+/FeTOT = K1 + K2*I930 are:")
print(params_I)
calibration_I = rp.linear(I930_std,*params_I)
# values for the reverse fit: line on plot
popt_lf, pcov_lf = op.curve_fit(rp.linear,fe3_standards,I930_std)
params_lf = uncertainties.correlated_values(popt_lf, pcov_lf)
linear_fit = rp.linear(fe3_standards,*params_lf)
plt.figure(figsize=(3.22,3.22),dpi=300)
intensity_results = np.zeros((int(nb_exp/5),3)) # intensity and ese values in columns 0 and 1, mean_fe in 3
counter = 0
for i in range(0,nb_exp,5):
intensity_results[counter,0] = np.mean(I930_std[i:i+5])
intensity_results[counter,1] = np.std(I930_std[i:i+5],ddof=1)
intensity_results[counter,2] = np.mean(un.nominal_values(calibration_I)[i:i+5])
counter = counter+1
plt.errorbar(mean_fe3,intensity_results[:,0],yerr=intensity_results[:,1],fmt=".",mfc="none",mec="black",color="black")
plt.plot(fe3_standards,un.nominal_values(linear_fit),"-",color="k")
plt.xlabel(r"M\"ossbauer Fe$^{3+}$/Fe\textsuperscript{TOT}")
plt.ylabel(r"I$_{930}$, normalised")
plt.xlim(-0.03,1.03)
plt.tight_layout()
plt.savefig("./figures/Figure3.pdf")
rmse_int = np.sqrt(mean_squared_error(un.nominal_values(calibration_I), fe3_standards))
print("")
print("The intensity ratio method has a rmse of %s" % str(round(rmse_int,2)))
In [7]:
# On the entire dataset, we take the mean first and last spectra (Fe3+/FeTOT = 1 and 0, respectively) as references
oxy_ref = np.transpose(mean_y_fit[:,0])
red_ref = np.transpose(mean_y_fit[:,-1])
Fox = rp.mixing_sp(y_fit,oxy_ref,red_ref)
print("")
print("Correlation coefficient between Fox and glass Fe3+/FeTOT is equal to %f" % np.corrcoef(Fox,fe3_standards)[0,1])
print("")
# Calculating the average value for the 5 datapoints per sample
counter = 0
Fox_results = np.zeros((int(nb_exp/5),2))
for i in range(0,nb_exp,5):
Fox_results[counter,0] = np.mean(Fox[i:i+5])
Fox_results[counter,1] = np.std(Fox[i:i+5],ddof=1)
counter = counter+1
#
# Linear regression
#
popt_M, pcov_M = op.curve_fit(rp.linear,Fox,fe3_standards,sigma = np.ones(len(Fox))*0.01)# least square fit using the rp.linear function
params_M = uncertainties.correlated_values(popt_M, pcov_M)# package fitted parameters with their uncertainties
print("")
print("The linear regression Fe3+/FeTOT = K1 + K2*FOX have parameters:")
print(params_M)
calibration_M = rp.linear(Fox,*params_M)
# values for the reverse fit: line on plot
popt_lfm, pcov_lfm = op.curve_fit(rp.linear,fe3_standards,Fox)
params_lfm = uncertainties.correlated_values(popt_lfm, pcov_lfm)
linear_fit_M = rp.linear(fe3_standards,*params_lfm)
#
# Making figure 4
#
plt.close('all')
fig = plt.figure(figsize=(3.22,6.44),dpi=300)
gs1 = gridspec.GridSpec(5, 1)
ax1 = fig.add_subplot(gs1[0:-1])
ax2 = fig.add_subplot(gs1[-1])
gs2 = gridspec.GridSpec(1, 1)
ax3 = fig.add_subplot(gs2[:,:])
# ticks
ax1.set_xticks([])
ax2.set_xticks([850,900,950,1000,1050,1100,1150])
ax2.set_yticks([-0.02,0,0.02])
# limits
ax1.set_xlim(830,1150)
ax2.set_xlim(830,1150)
ax2.set_ylim(-0.03,0.03)
ax3.set_xlim(-0.03,1.03)
ax3.set_ylim(-0.03,1.03)
# label names
ax1.set_ylabel("Intensity, normalised")
ax2.set_xlabel(r"Raman shift, cm$^{-1}$")
ax3.set_xlabel(r"M\"ossbauer Fe$^{3+}$/Fe\textsuperscript{TOT}")
ax3.set_ylabel(r"Mixing fraction $F\textsubscript{OX}$")
#
# plotting
#
l1, = ax1.plot(x_fit,mean_y_fit[:,4],"k-",linewidth=2.0,label=r"spectrum")
l2, = ax1.plot(x_fit,Fox_results[4,0]*oxy_ref+(1-Fox_results[4,0])*red_ref,"r--",label="fit")
ax1.plot(x_fit,Fox_results[4,0]*oxy_ref,"--",color='orange')
ax1.plot(x_fit,(1-Fox_results[4,0])*red_ref,"-.",color="purple",label=r"D\textsubscript{Fe$^{3+}$/Fe\textsuperscript{TOT} = 0")
ax2.plot(x_fit,Fox_results[4,0]*oxy_ref+(1-Fox_results[4,0])*red_ref-mean_y_fit[:,4] - 0.0,"r-",label="residuals")
ax2.plot([830,1300],[0,0],"k--")
ax3.errorbar(mean_fe3, Fox_results[:,0],yerr=Fox_results[:,1],fmt=".",mfc="none",mec="black",color="black")
ax3.plot(fe3_standards,un.nominal_values(linear_fit_M),"-",color="k")
# legend
ax1.legend(handles=[l1,l2],loc='best')
# annotations
ax1.annotate("A)",xy=(0.07,0.93),xycoords='axes fraction',ha="center",va="center")
ax2.annotate("Residuals",xy=(0.05,0.7),xycoords='axes fraction')
ax3.annotate("B)",xy=(0.07,0.93),xycoords='axes fraction',ha="center",va="center")
ax1.annotate(r"$S\textsubscript{OX}$",xy=(970,0.32),color="orange")
ax1.annotate(r"$S\textsubscript{RED}$",xy=(990,0.62),color="purple")
# Final adjustment of the grid for nice output
gs1.tight_layout(fig,h_pad=0.1,rect=[0.,0.5,1,1])
gs2.tight_layout(fig,rect=[0.,0,1,0.5])
gs1.update(bottom=0.55)
gs2.update(top=0.45)
left = min(gs1.left, gs2.left)
right = max(gs1.right, gs2.right)
gs1.update(left=left, right=right)
gs2.update(left=left, right=right)
fig.savefig("./figures/Figure4.pdf") # saving
rmse_mixing_std = np.sqrt(mean_squared_error(Fox, fe3_standards))
print("root-mean-square error between Fox as a proxy of Fe3+/Fetot and measured Fe3/Fetot = %s" % str(round(rmse_mixing_std,2)))
Alternative Least Square Multivariate Curve Resolution
The ALS - MCR analysis was performed using the PyMCR package in Python.
This allowed to estimate endmember spectra, which then can be mixed with the mixing_sp() function to match new observations.
Tests of the effect of the dataset size and Fe$^{3+}$/Fe$^{TOT}$ span are performed, as well as of how the choice of starting components can affect results. See article for details.
In [8]:
# test of the dataset Fe3+/FeTOT span and mean value in a loop
mcr_err_D_ = np.zeros((len(mean_fe3),len(mean_fe3)))
dD = np.zeros((len(mean_fe3),len(mean_fe3))) #delta
mD = np.zeros((len(mean_fe3),len(mean_fe3))) #mean
for i in range(0,len(mean_fe3)):
for j in range (0,len(mean_fe3)):
if (i < j):
D_ = y_fit[:,(fe3_standards<=mean_fe3[i])&(fe3_standards>= mean_fe3[j])]
dD[i,j] = np.abs(mean_fe3[i]-mean_fe3[j])
mD[i,j] = np.mean(fe3_standards[(fe3_standards<=mean_fe3[i])&(fe3_standards>= mean_fe3[j])])
mcrals_ = McrAls(max_iter=50,c_constraints=[ConstraintNonneg(), ConstraintNorm()])
mcrals_.fit(D_.T,ST=D_[:,(0,-1)].T,verbose=False)
conc_als_ = rp.mixing_sp(y_fit,mcrals_.ST_opt_[0,:],mcrals_.ST_opt_[1,:])
mcr_err_D_[i,j]=np.sqrt(mean_squared_error(conc_als_, fe3_standards))
In [9]:
# test of the effect of the starting S components on final rmse in a loop
mcr_err_S_ = np.zeros((len(mean_fe3),len(mean_fe3)))
dS = np.zeros((len(mean_fe3),len(mean_fe3))) #delta
mS = np.zeros((len(mean_fe3),len(mean_fe3))) #mean
for i in range(0,len(mean_fe3)):
for j in range (0,len(mean_fe3)):
if (i < j):
dS[i,j] = np.abs(mean_fe3[i]-mean_fe3[j])
mS[i,j] = (mean_fe3[i]+mean_fe3[j])/2
mcrals_ = McrAls(max_iter=50,c_constraints=[ConstraintNonneg(), ConstraintNorm()])
mcrals_.fit(y_fit.T,ST=mean_y_fit[:,(i,j)].T,verbose=False)
if mcrals_.C_opt_[0,0] > mcrals_.C_opt_[0,1]:
mcr_err_S_[i,j]=np.sqrt(mean_squared_error(mcrals_.C_opt_[:,0], fe3_standards))
else:
mcr_err_S_[i,j]=np.sqrt(mean_squared_error(mcrals_.C_opt_[:,1], fe3_standards))
In [10]:
# Figure 5 : results
from matplotlib.mlab import griddata
import matplotlib.tri as tri
# Trying tricontour
plt.figure(figsize=(3.22,6),dpi=300)
plt.subplot(2,1,1)
plt.tricontour(dD.flatten(), mD.flatten(), mcr_err_D_.flatten(), 20, linewidths=0.5, colors='k') # contour lines
plt.tricontourf(dD.flatten(), mD.flatten(), mcr_err_D_.flatten(), 20, # color map plot/parameters
cmap = matplotlib.cm.plasma,
norm=plt.Normalize(vmax=np.max(mcr_err_D_), vmin=np.min(mcr_err_D_))) #abs(Ir).min()
cbar1 = plt.colorbar() # draw colorbar
cbar1.ax.set_ylabel(r'RMSD Fe$^{3+}$/Fe\textsuperscript{TOT}')
plt.xlabel(r"$\Delta$Fe$^{3+}$/Fe\textsuperscript{TOT} of $D_{n,m}$")
plt.ylabel(r"mean Fe$^{3+}$/Fe\textsuperscript{TOT} of $D_{n,m}$")
plt.plot(dD.flatten(), mD.flatten(), 'k.', ms=3)
plt.annotate("A)",xy=(0.93,0.93),ha="center",va="center",xycoords="axes fraction")
plt.subplot(2,1,2)
plt.tricontour(dS.flatten(), mS.flatten(), mcr_err_S_.flatten(), 20, linewidths=0.5, colors='k') # contour lines
plt.tricontourf(dS.flatten(), mS.flatten(), mcr_err_S_.flatten(), 20, # color map plot/parameters
cmap = matplotlib.cm.plasma,
norm=plt.Normalize(vmax=np.max(mcr_err_S_), vmin=np.min(mcr_err_S_))) #abs(Ir).min()
cbar2 = plt.colorbar() # draw colorbar
cbar2.ax.set_ylabel(r'RMSD Fe$^{3+}$/Fe\textsuperscript{TOT}')
plt.plot(dS.flatten(), mS.flatten(), 'k.', ms=3)
plt.xlabel(r"$\Delta$Fe$^{3+}$/Fe\textsuperscript{TOT} of initial $S_{k,m}$")
plt.ylabel(r"mean Fe$^{3+}$/Fe\textsuperscript{TOT} of initial $S_{k,m}$")
plt.annotate("B)",xy=(0.93,0.93),ha="center",va="center",xycoords="axes fraction")
plt.tight_layout()
plt.savefig("./figures/Figure5.pdf")
In [11]:
#
# Doing the fit with mean spectra at 0.66 (2) and 0.25 (5) as starting S values (index 0 and -1)
#
mcrals = McrAls(max_iter=50,c_constraints=[ConstraintNonneg(), ConstraintNorm()])
mcrals.fit(y_fit.T,ST=mean_y_fit[:,(2,5)].T)
print('\nFinal MSE: {:.7e}'.format(mcrals.err[-1]))
print('Optimal Iteration: {:.7e}'.format(mcrals.n_iter_opt))
# we mix the optimised extrema
conc_als_mcr = rp.mixing_sp(y_fit,mcrals.ST_opt_[0,:],mcrals.ST_opt_[1,:])
#
# Linear regression
#
popt_MCR, pcov_MCR = op.curve_fit(rp.linear,conc_als_mcr,fe3_standards)# least square fit using the rp.linear function
params_MCR = uncertainties.correlated_values(popt_MCR, pcov_MCR)# package fitted parameters with their uncertainties
print("")
print("The linear regression Fe3+/FeTOT = K1 + K2*Cox for ALS MCR have parameters:")
print(params_MCR)
calibration_MCR = rp.linear(conc_als_mcr,*params_MCR) # good MCR fractions
# for the plot: reverse fit
popt_lfmcr, pcov_lfmcr = op.curve_fit(rp.linear,fe3_standards,conc_als_mcr)
params_lfmcr = uncertainties.correlated_values(popt_lfmcr, pcov_lfmcr)
linear_fit_MCR = rp.linear(fe3_standards,*params_lfmcr)
mse_als = np.sqrt(mean_squared_error(un.nominal_values(calibration_MCR), fe3_standards))
print('RMSE with using starting spectra at 0.66 and 0.25 is %.2f' % mse_als)
print('If we assume Cox directly equals Fe3+/FeTOT, rmse is %.2f' % np.sqrt(mean_squared_error(conc_als_mcr, fe3_standards)))
#
# calculating mean values for errorbar plot
#
conc_als_mcr_mean = np.zeros((len(mean_fe3),3)) # mean value, std, and associated mean Fe3
counter = 0
for i in range(0,nb_exp,5):
conc_als_mcr_mean[counter,0]=np.mean(conc_als_mcr[i:i+5])
conc_als_mcr_mean[counter,1]=np.std(conc_als_mcr[i:i+5],ddof=1)
conc_als_mcr_mean[counter,2]=np.mean(un.nominal_values(calibration_MCR)[i:i+5])
counter += 1
#
# FIGURE 6
#
fig6 = plt.figure(figsize=(3.22,9.66),dpi=300)
#
# Preparing layout
#
plt_height = 0.24
gs1m = GridSpec(5, 1)
ax1_mcr = fig6.add_subplot(gs1m[0:-1, 0])
ax5_mcr = fig6.add_subplot(gs1m[-1, 0])
gs2m = GridSpec(5, 1)
ax2_mcr = fig6.add_subplot(gs2m[0:-1, 0])
ax3_mcr = fig6.add_subplot(gs2m[-1, 0])
gs3m = GridSpec(1, 1)
ax4_mcr = fig6.add_subplot(gs3m[0, 0])
#
# PLOTS
#
# those were the starting spectra
ax1_mcr.plot(x_fit,mean_y_fit[:,2],"-",color='orange',alpha=0.4,label=r'init. $S$\textsubscript{OX}')
ax1_mcr.plot(x_fit,mean_y_fit[:,5],"-",color='purple',alpha=0.4,label=r'init. $S$\textsubscript{RED}')
# those are the optimised spectra
ax1_mcr.plot(x_fit,mcrals.ST_opt_[0,:],"--",color='orange',label=r'opt. $S$\textsubscript{OX}')
ax1_mcr.plot(x_fit,mcrals.ST_opt_[1,:],"-.",color='purple',label=r'opt. $S$\textsubscript{RED}')
# the fits
ax2_mcr.plot(x_fit,mean_y_fit[:,4],"k-",linewidth=2.0,label=r"spectrum")
ax2_mcr.plot(x_fit,conc_als_mcr_mean[4,0]*mcrals.ST_opt_[0,:]+(1-conc_als_mcr_mean[4,0])*mcrals.ST_opt_[1,:],"r--",label="fit")
ax2_mcr.plot(x_fit,conc_als_mcr_mean[4,0]*mcrals.ST_opt_[0,:],"--",color="orange")
ax2_mcr.plot(x_fit,(1-conc_als_mcr_mean[4,0])*mcrals.ST_opt_[1,:],"-.",color="purple")
ax3_mcr.plot(x_fit,mean_y_fit[:,4]-(conc_als_mcr_mean[4,0]*mcrals.ST_opt_[0,:]+(1-conc_als_mcr_mean[4,0])*mcrals.ST_opt_[1,:]),"r-",label="fit")
ax3_mcr.plot([830,1300],[0,0],"k--")
# the iron comparison
ax4_mcr.plot(fe3_standards,un.nominal_values(linear_fit_MCR),"-",color="k")
ax4_mcr.errorbar(mean_fe3,conc_als_mcr_mean[:,0],yerr=conc_als_mcr_mean[:,1],fmt='.',mfc="none",mec="black",color="black")
# comparing the optimised spectra and those at Fe3+/FeTOT = 0 and 1
ax5_mcr.plot(x_fit,mcrals.ST_opt_[0,:]-mean_y_fit[:,0],"--",color='orange',label=r'opt. $S$\textsubscript{OX} - D\textsubscript{Fe$^{3+}$/Fe\textsuperscript{TOT} = 1}')
ax5_mcr.plot(x_fit,mcrals.ST_opt_[1,:]-mean_y_fit[:,-1],"-.",color='purple',label=r'opt. $S$\textsubscript{RED} - D\textsubscript{Fe$^{3+}$/Fe\textsuperscript{TOT} = 0}')
ax5_mcr.plot([830,1300],[0,0],"k--")
# Limits
ax1_mcr.set_xlim(830,1150)
ax2_mcr.set_xlim(830,1150)
ax3_mcr.set_xlim(830,1150)
ax4_mcr.set_xlim(-0.03,1.03)
ax5_mcr.set_xlim(830,1150)
ax1_mcr.set_ylim(-0.03,1.03)
ax2_mcr.set_ylim(-0.03,1.03)
ax3_mcr.set_ylim(-0.03,0.03)
ax4_mcr.set_ylim(-0.03,1.03)
ax5_mcr.set_ylim(-0.03,0.03)
# labels
ax3_mcr.set_xlabel(r"Raman shift, cm$^{-1}$")
ax4_mcr.set_xlabel(r"M\"ossbauer Fe$^{3+}$/Fe\textsuperscript{TOT}")
ax5_mcr.set_xlabel(r"Raman shift, cm$^{-1}$")
ax1_mcr.set_ylabel("Intensity, normalised")
ax2_mcr.set_ylabel("Intensity, normalised")
ax4_mcr.set_ylabel(r"$C\textsubscript{OX}$")
# annotations
ax1_mcr.annotate("A)",xy=(0.07,0.93),ha="center",va="center",xycoords="axes fraction")
ax2_mcr.annotate("B)",xy=(0.07,0.93),ha="center",va="center",xycoords="axes fraction")
ax2_mcr.annotate(r"$S\textsubscript{OX}$",xy=(970,0.32),color="orange")
ax2_mcr.annotate(r"$S\textsubscript{RED}$",xy=(990,0.62),color="purple")
ax3_mcr.annotate("Res.",xy=(0.05,0.7),xycoords="axes fraction")
ax4_mcr.annotate("C)",xy=(0.07,0.93),ha="center",va="center",xycoords="axes fraction")
ax5_mcr.annotate("Res.",xy=(0.05,0.7),xycoords="axes fraction")
# legend
ax1_mcr.legend(loc="best")
ax2_mcr.legend(loc="best")
# ticks
ax1_mcr.set_xticks([])
ax2_mcr.set_xticks([])
ax3_mcr.set_yticks([-0.02,0,0.02])
ax5_mcr.set_yticks([-0.02,0,0.02])
ax5_mcr.set_xticks([850,900,950,1000,1050,1100,1150])
ax3_mcr.set_xticks([850,900,950,1000,1050,1100,1150])
# Final adjustment of the grid for nice output
gs1m.tight_layout(fig6,w_pad=0.,h_pad=0.1,rect=[0,0.66,1,1.])
gs2m.tight_layout(fig6,w_pad=0.,h_pad=0.1,rect=[0,0.33,1,.66])
gs3m.tight_layout(fig6,w_pad=0.,rect=[0,0.,1,.33])
gs1m.update(bottom=0.70)
gs2m.update(bottom=0.36,top=0.64)
gs3m.update(top=0.30)
left = min(gs1m.left, gs2m.left, gs3m.left)
right = max(gs1m.right, gs2m.right, gs3m.right)
gs1m.update(left=left, right=right)
gs2m.update(left=left, right=right)
gs3m.update(left=left, right=right)
fig6.savefig("./figures/Figure6.pdf")
In [12]:
#
# TRAIN-TEST SPLIT
#
train_sub, test_sub, idxtrain_sub, idxtest_sub = rp.chemical_splitting(liste_standards,'logfo2',0.35)
y_train = fe3_standards.iloc[idxtrain_sub[0]]
X_train = y_fit[:,idxtrain_sub[0]]
y_test = fe3_standards.iloc[idxtest_sub[0]]
X_test = y_fit[:,idxtest_sub[0]]
# we need to put X_test and X_train with rows = samples and columns = frequencies
X_train = np.transpose(X_train)
X_test = np.transpose(X_test)
print("Testing data subset represents %.1f percents of the dataset." % (float(len(y_test))/float(len(fe3_standards))*100))
#
# TRAINING
#
#
# Gridsearch of hyperparameters for kernel ridge and support vector regression
#
prm_kr = dict(alpha=[1e2, 1e3, 1e1, 1e0, 0.5, 0.1, 5e-2, 1e-2, 5e-3, 1e-3],gamma=np.logspace(-6, 6, 13))
prm_svr = dict(C=[1e0, 2e0, 5e0, 1e1, 5e1, 1e2, 5e2, 1e3, 5e3, 1e4, 5e4, 1e5],gamma=np.logspace(-6, 6, 13))
#
# parameters for neural net
#
# 3 relu activation units, solver lbfgs, early stopping on
prm_nn = dict(hidden_layer_sizes=(3,),solver = 'lbfgs',activation='relu',early_stopping=True)
# bagging 100 networks, with bootstrap on, random state 42 and on 1 core.
prm_bag = dict(n_estimators=100, max_samples=1.0, max_features=1.0, bootstrap=True, bootstrap_features=False, oob_score=False, warm_start=False, n_jobs=1, random_state=42, verbose=0)
# model objects
nn_mod = rp.mlregressor(X_train,y_train.values,X_test=X_test, y_test = y_test.values,algorithm="BaggingNeuralNet",scaler="StandardScaler")
svr_mod = rp.mlregressor(X_train,y_train.values,X_test=X_test, y_test = y_test.values,algorithm="SVM",scaler="StandardScaler")
kr_mod = rp.mlregressor(X_train,y_train.values,X_test=X_test, y_test = y_test.values,algorithm="KernelRidge",scaler="StandardScaler")
svr_mod.param_svm = prm_svr
kr_mod.param_kr = prm_kr
nn_mod.param_neurons = prm_nn
nn_mod.param_bag = prm_bag
nn_mod.fit()
kr_mod.fit()
svr_mod.fit()
In [13]:
#
# Calculating mean values for each sample, for the figure 6
#
y_train_mean = np.zeros((int(len(y_train)/5)))
y_test_mean = np.zeros((int(len(y_test)/5)))
kr_train_mean = np.zeros((int(len(y_train)/5),2))
kr_test_mean = np.zeros((int(len(y_test)/5),2))
svr_train_mean = np.zeros((int(len(y_train)/5),2))
svr_test_mean = np.zeros((int(len(y_test)/5),2))
nn_train_mean = np.zeros((int(len(y_train)/5),2))
nn_test_mean = np.zeros((int(len(y_test)/5),2))
# for the training subset
counter= 0
for i in range(0,len(y_train),5):
y_train_mean[counter] = np.mean(y_train[i:i+5])
kr_train_mean[counter,0] = np.mean(kr_mod.prediction_train[i:i+5])
svr_train_mean[counter,0] = np.mean(svr_mod.prediction_train[i:i+5])
nn_train_mean[counter,0] = np.mean(nn_mod.prediction_train[i:i+5])
kr_train_mean[counter,1] = np.std(kr_mod.prediction_train[i:i+5],ddof=1)
svr_train_mean[counter,1] = np.std(svr_mod.prediction_train[i:i+5],ddof=1)
nn_train_mean[counter,1] = np.std(nn_mod.prediction_train[i:i+5],ddof=1)
counter += 1
# for testing set subset
counter = 0
for i in range(0,len(y_test),5):
y_test_mean[counter] = np.mean(y_test[i:i+5])
kr_test_mean[counter,0] = np.mean(kr_mod.prediction_test[i:i+5])
svr_test_mean[counter,0] = np.mean(svr_mod.prediction_test[i:i+5])
nn_test_mean[counter,0] = np.mean(nn_mod.prediction_test[i:i+5])
kr_test_mean[counter,1] = np.std(kr_mod.prediction_test[i:i+5],ddof=1)
svr_test_mean[counter,1] = np.std(svr_mod.prediction_test[i:i+5],ddof=1)
nn_test_mean[counter,1] = np.std(nn_mod.prediction_test[i:i+5],ddof=1)
counter += 1
#
# Measuring errors on training and testing datasets
#
MSE_SVR_train = np.sqrt(mean_squared_error(y_train, svr_mod.prediction_train))
MSE_KR_train = np.sqrt(mean_squared_error(y_train, kr_mod.prediction_train))
MSE_NN_train = np.sqrt(mean_squared_error(y_train, nn_mod.prediction_train))
MSE_SVR_test = np.sqrt(mean_squared_error(y_test, svr_mod.prediction_test))
MSE_KR_test = np.sqrt(mean_squared_error(y_test, kr_mod.prediction_test))
MSE_NN_test = np.sqrt(mean_squared_error(y_test, nn_mod.prediction_test))
#
# returning values for the entire dataset
# we use the Yscaler, Xscaler and model_xx objects to predict new values
# those objects can be saved on the hard drive for futur estimations using joblib (see skleanr docs)
#
nn_all = nn_mod.predict(y_fit.T)
kr_all = kr_mod.predict(y_fit.T)
svr_all = svr_mod.predict(y_fit.T)
# calculating the mean for all samples
kr_all_mean = np.zeros(len(mean_fe3))
svr_all_mean = np.zeros(len(mean_fe3))
nn_all_mean = np.zeros(len(mean_fe3))
counter = 0
for i in range(0,len(fe3_standards),5):
kr_all_mean[counter] = np.mean(kr_all[i:i+5])
svr_all_mean[counter] = np.mean(svr_all[i:i+5])
nn_all_mean[counter] = np.mean(nn_all[i:i+5])
counter += 1
#
# Printing
#
print("Training errors are:")
print("Support Vector: %.2f" % MSE_SVR_train)
print("Kernel Ridge: %.2f" % MSE_KR_train)
print("Neural Net: %.2f" % MSE_NN_train)
print("")
print("Testing errors are:")
print("Support Vector: %.2f" % MSE_SVR_test)
print("Kernel Ridge: %.2f" % MSE_KR_test)
print("Neural Net: %.2f" % MSE_NN_test)
In [14]:
fig7 = plt.figure(figsize=(3.22,9),dpi=300)
# Neural Networks
plt.subplot(3,1,1)
plt.annotate("A)",xy=[0.07,0.93],ha="center",va="center",fontsize=12)
plt.errorbar(y_train_mean, nn_train_mean[:,0],yerr=2*nn_train_mean[:,1],fmt=".",mfc="none",mec="black",color="black",label="Training")
plt.errorbar(y_test_mean, nn_test_mean[:,0],yerr=2*nn_test_mean[:,1],fmt="*",mfc="black",mec="black",color="black",label="Testing")
plt.plot([0,1],[0,1],"k--")
plt.xlim(-0.05,1.05); plt.ylim(-0.05,1.05)
plt.legend(loc=4)
plt.subplot(3,1,2)
# Kernel Ridge
plt.annotate("B)",xy=[0.07,0.93],ha="center",va="center",fontsize=12)
plt.errorbar(y_train_mean, kr_train_mean[:,0],yerr=2*kr_train_mean[:,1],fmt=".",mfc="none",mec="black",color="black",label="Training")
plt.errorbar(y_test_mean, kr_test_mean[:,0],yerr=2*kr_test_mean[:,1],fmt="*",mfc="black",mec="black",color="black",label="Testing")
plt.plot([0,1],[0,1],"k--")
plt.xlim(-0.05,1.05); plt.ylim(-0.05,1.05)
plt.ylabel(r"Raman Fe$^{3+}$/Fe\textsuperscript{TOT}")
# Support Vector
plt.subplot(3,1,3)
plt.annotate("C)",xy=[0.07,0.93],ha="center",va="center",fontsize=12)
plt.errorbar(y_train_mean, svr_train_mean[:,0],yerr=2*svr_train_mean[:,1],fmt=".",mfc="none",mec="black",color="black",label="Training")
plt.errorbar(y_test_mean, svr_test_mean[:,0],yerr=2*svr_test_mean[:,1],fmt="*",mfc="black",mec="black",color="black",label="Testing")
plt.plot([0,1],[0,1],"k--")
plt.xlim(-0.05,1.05); plt.ylim(-0.05,1.05)
plt.xlabel(r"M\"ossbauer Fe$^{3+}$/Fe\textsuperscript{TOT}")
fig7.tight_layout()
fig7.savefig("./figures/Figure7.pdf")
In [15]:
MSE_tot = np.sqrt(mean_squared_error(np.mean(np.vstack((nn_all.T,kr_all.T,svr_all.T,un.nominal_values(calibration_MCR),un.nominal_values(calibration_I),Fox)).T,1),fe3_standards))
print("SUMMARY OF THE ERRORS")
print("")
print("RMSE for ALS MCR, Intensity ratio and Mixing methods:")
print("Intensity method: %f"%round(rmse_int,2))
print("Mixing method = %f"%round(rmse_mixing_std,2))
print("ALS MCR method: %f"%round(mse_als,2))
print("")
print("RMSE for supervised machine learning methods:")
print("")
print("TRAINING SUBSET")
print("NN method: %f"%round(MSE_NN_train,2))
print("KR method: %f"%round(MSE_KR_train,2))
print("SVR method: %f"%round(MSE_SVR_train,2))
print("")
print("TESTING SUBSET")
print("NN method: %f"%round(MSE_NN_test,2))
print("KR method: %f"%round(MSE_KR_test,2))
print("SVR method: %f"%round(MSE_SVR_test,2))
print("")
print("")
print("Global RMSE is %f"%round(MSE_tot,2))
In [16]:
# loading dataset and pre-treatment
liste_T64000 = pd.read_excel("./MORB_datalist.xlsx", sheet_name="T64000")
# preparing the data for the standards
x_t64, t64_record, bas2_t64, x_fit_t64, y_t64_nosmo, y_t64, nb_exp_t64 = iron.preparing_data(liste_T64000,
cutoff=np.array([850.,1140.]))
#
# Intensity Method
#
I930_t64 = iron.intensity(x_fit_t64,y_t64, idx1=929.,idx2=931.)
fe3_t64_900 = un.nominal_values(rp.linear(I930_t64,*params_I))
#
# Mixing with extremum spectra
#
fe3_t64_mix = rp.mixing_sp(y_t64,oxy_ref,red_ref)
#
# Mixing with ALS MCR spectra
#
Cox_t64_als = rp.mixing_sp(y_t64,mcrals.ST_opt_[0,:],mcrals.ST_opt_[1,:])
fe3_t64_als = un.nominal_values(rp.linear(Cox_t64_als,*params_MCR))
#
# Supervised Machine learning techniques
#
# predicting
fe3_t64_svr = svr_mod.predict(y_t64.T)
fe3_t64_kr = kr_mod.predict(y_t64.T)
fe3_t64_nn = nn_mod.predict(y_t64.T)
#
# Average values
#
fe3_t64_mean = np.mean(np.concatenate((fe3_t64_900.reshape(-1,1),fe3_t64_mix.reshape(-1,1),fe3_t64_als.reshape(-1,1),
fe3_t64_svr.reshape(-1,1),fe3_t64_kr.reshape(-1,1),fe3_t64_nn.reshape(-1,1)),axis=1),axis=1)
fe3_t64_std = np.std(np.concatenate((fe3_t64_900.reshape(-1,1),fe3_t64_mix.reshape(-1,1),fe3_t64_als.reshape(-1,1),
fe3_t64_svr.reshape(-1,1),fe3_t64_kr.reshape(-1,1),fe3_t64_nn.reshape(-1,1)),axis=1),axis=1,ddof=1)
#
# Figure T64000
#
figT64 = plt.figure(figsize=(6.73,3.24),dpi=300)
plt.subplot(1,2,1)
plt.plot(x_fit_t64,y_t64_nosmo[:,0],"k-")#,color=s_m.to_rgba(liste_T64000.fe3_moss[0]))
plt.xlabel(r"Raman shift, cm$^{-1}$")
plt.ylabel("Intensity, normalised")
plt.annotate("A)",xy=[0.07,0.93],xycoords='axes fraction',ha="center",va="center",fontsize=12)
plt.subplot(1,2,2)
plt.errorbar(liste_T64000.fe3_moss,fe3_t64_mean,yerr=fe3_t64_std,fmt=".",mfc="none",mec="black",color="black")
plt.plot([0,1],[0,1],"k--")
plt.annotate("B)",xy=[0.07,0.93],xycoords='axes fraction',ha="center",va="center",fontsize=12)
plt.xlabel(r"M\"ossbauer Fe$^{3+}$/Fe\textsuperscript{TOT}")
plt.ylabel(r"Raman Fe$^{3+}$/Fe\textsuperscript{TOT}")
plt.xlim(-0.03,1.03)
plt.tight_layout()
#
# Noise calculation
#
ese_noise_t64 = np.sqrt(np.sum((y_t64-y_t64_nosmo)**2/y_t64.shape[0],axis=0))
ese_fe_t64 = np.sqrt(mean_squared_error(liste_T64000.fe3_moss,fe3_t64_mean))
ese_noise_standards = np.sqrt(np.sum((y_fit-y_fit_nosmo)**2/y_fit.shape[0],axis=0))
SN_t64 = 1.0/np.mean(ese_noise_t64)
SN_renishaw = 1.0/np.mean(ese_noise_standards)
print("Mean signal/noise of spectra acquired on the T64000 : %.2f" % SN_t64)
print("Mean signal/noise of spectra acquired on the Renishaw : %.2f" % SN_renishaw)
print("Mean error on Fe3+/FeTOT with the T64000 : %.2f" % np.mean(ese_fe_t64))
print("Mean error on Fe3+/FeTOT with the Renishaw : %.2f" % np.mean(MSE_tot))
print("\nIn the figure: \nA) is an example of a spectrum acquired with the T64000; \nB) is a plot of predicted (average of the six methods) versus measured Fe3+/FeTOT")
In [17]:
# loading dataset and pre-treatment
liste_standards = pd.read_excel("./MORB_datalist.xlsx", sheet_name="standards") # we reload the standard list for safety
liste_ori = pd.read_excel("./MORB_datalist.xlsx", sheet_name="natural")
# preparing the data
x_nat, natural_record, bas2_nat, x_fit_nat, y_natural_nosmo, y_natural, nb_exp_nat = iron.preparing_data(liste_ori,
cutoff=np.array([850.,1140.]))
#
# Supplementary Figure
#
plt.figure(figsize=(10,10),dpi=300)
shift = 0
for i in range(0,natural_record.shape[1],5):
plt.plot(x_nat[x_nat<1270],np.mean(natural_record[x_nat<1270,i:i+5],axis=1)+shift)
plt.annotate(liste_ori.loc[i,"sample"],xy=(1280.,shift),va="center")
shift += 0.3
plt.xlabel(r"Raman shift, cm$^{-1}$"); plt.ylabel("Intensity, normalised")
plt.xlim(300,1400)
plt.show()
# exclude the following samples based on previous figure (too much CTX)
liste_natural = liste_ori[liste_ori["sample"] != "VG290"] # Ctx
liste_natural = liste_natural[liste_natural["sample"] != "VG3450"] # weird peak 800
liste_natural = liste_natural[liste_natural["sample"] != "VG175"] # ctx
liste_natural = liste_natural[liste_natural["sample"] != "VG183"] # ctx
# shape should be smaller if success
print(liste_natural.shape)
#
# Re-preparing the data for redox analysis
#
x_nat, natural_record, bas2_nat, x_fit_nat, y_natural_nosmo, y_natural, nb_exp_nat = iron.preparing_data(liste_natural,
cutoff=np.array([850.,1140.]))
# number of natural samples
print("We kept %i natural samples." % int(nb_exp_nat/5))
liste_natural.index = range(nb_exp_nat)
#
# mean spectra per sample
#
mean_y_nat = np.ones((y_natural.shape[0],int(nb_exp_nat/5)))
counter = 0
for i in range(0,nb_exp_nat,5):
mean_y_nat[:,counter]=np.mean(y_natural[:,i:i+5],1)
counter = counter+1
#
# intensity at 930
#
I930_nat = iron.intensity(x_fit_nat,y_natural, idx1=929.,idx2=931.)
I930_nat_mean = iron.intensity(x_fit_nat,mean_y_nat, idx1=929.,idx2=931.)
#
# figure 8
#
plt.figure(figsize=(6,3.22),dpi=500)
plt.subplot(1,2,1)
plt.plot(x_fit_nat,
y_natural[:,:],
color="k",
alpha=0.05)
plt.plot(x_fit,
mean_y_fit[:,mean_fe3==0.103],"r-.")
plt.xlabel(r"Raman shift, cm$^{-1}$"); plt.ylabel("Intensity, normalised")
plt.xticks([850,900,950,1000,1050,1100,1150])
plt.xlim(830,1150)
plt.annotate("A)",xy=[0.07,0.93],xycoords="axes fraction",ha="center",va="center",fontsize=12)
plt.tight_layout()
plt.subplot(1,2,2)
for i in range(0,nb_exp_nat,5):
plt.errorbar(liste_natural.loc[i,"mgo"]+liste_natural.loc[i,"cao"],np.mean(I930_nat[i:i+5]),yerr=np.std(I930_nat[i:i+5]),fmt=".",color="k")
plt.xlabel(r"MgO + CaO, wt\%")
plt.ylabel(r"I$_{930}$, normalised")
#plt.xlim(-0.03,1.03)
plt.annotate("B)",xy=[0.07,0.93],xycoords="axes fraction",ha="center",va="center",fontsize=12)
plt.tight_layout()
plt.savefig("./figures/Figure8.pdf")
print("Correlation coefficient between X and Y in fig. 8B:")
print(np.corrcoef(liste_natural["mgo"]+liste_natural["cao"],I930_nat))
print("")
#
# Calculation of the associated Fe3+/FeTOT with previous methods
#
# Intensity Method
fe3_nat_900 = un.nominal_values(rp.linear(I930_nat,*params_I))
# Mixing with extremum spectra
fe3_nat_mix = rp.mixing_sp(y_natural,oxy_ref,red_ref)
# Mixing with ALS MCR spectra
Cox_nat_als = rp.mixing_sp(y_natural,mcrals.ST_opt_[0,:],mcrals.ST_opt_[1,:])
fe3_nat_als = un.nominal_values(rp.linear(Cox_nat_als,*params_MCR))
# Supervised Machine learning techniques
fe3_nat_svr = svr_mod.predict(y_natural.T)
fe3_nat_kr = kr_mod.predict(y_natural.T)
fe3_nat_nn = nn_mod.predict(y_natural.T)
# A dictionary recording the mean and standard deviation of each technique
df_ratios_nat = dict(
NN=np.mean(fe3_nat_nn),NN_ese=np.std(fe3_nat_nn,ddof=1),
SVR = np.mean(fe3_nat_svr), SVR_ese=np.std(fe3_nat_svr,ddof=1),
KRR = np.mean(fe3_nat_kr), KRR_ese=np.std(fe3_nat_kr,ddof=1),
I_930 = np.mean(fe3_nat_900), I_930_ese=np.std(fe3_nat_900,ddof=1),
Mixing = np.mean(fe3_nat_mix), Mixing_ese=np.std(fe3_nat_mix,ddof=1),
ALSMCR = np.mean(fe3_nat_als), ALSMCR_ese=np.std(fe3_nat_als,ddof=1))
print(df_ratios_nat)
mean_fe3_MORB = np.mean(np.vstack((fe3_nat_nn.T,fe3_nat_svr.T,fe3_nat_kr.T,fe3_nat_900,fe3_nat_mix,fe3_nat_als)))
ese_fe3_MORB = np.std(np.vstack((fe3_nat_nn.T,fe3_nat_svr.T,fe3_nat_kr.T,fe3_nat_900,fe3_nat_mix,fe3_nat_als)),ddof=1)
print("\n###########################")
print("\nFe3+/FeTOT calculated with all methods (no chemical correction):\n average and standard deviations for natural MORB glasses are %.2f and %.2f, respectively"% (mean_fe3_MORB, ese_fe3_MORB))
#
# Correction for the effect of chemistry
#
#### calculating chemistry in mol%
chem = iron.chimie_control(liste_standards)
chem = iron.wt_mol(chem)
# naturals
chem_nat = iron.chimie_control(liste_natural)
chem_nat = iron.wt_mol(chem_nat)
#### calculating MgCa parameter as a function of IFS
# declaration of ionic field strength for each cation, in nm^-2
ifs_na = 1.0/(0.110**2) # CN6, Whittaker et Muntus 1970
ifs_k = 1.0/(0.146**2) # CN6, Whittaker et Muntus 1970
ifs_ca = 2.0/(0.108**2) # CN6, Whittaker et Muntus 1970
ifs_mg = 2.0/(0.080**2) # CN6, Whittaker et Muntus 1970
ifs_fe2 = 2.0/(0.080**2) # CN6, Whittaker et Muntus 1970
ifs_si = 4.0/(0.026**2) # CN4, Whittaker et Muntus 1970
ifs_ti = 4.0/(0.069**2) # CN4, Whittaker et Muntus 1970
ifs_al = 3.0/(0.039**2) # CN4, Whittaker et Muntus 1970
#### STANDARDS
chem_tot = chem["sio2"]+chem["al2o3"]+chem["tio2"]+chem["na2o"]+chem["k2o"]+chem["mgo"]+chem["cao"]+chem["feo"]
chem_m = chem["na2o"]+chem["k2o"]+chem["mgo"]+chem["cao"]+chem["feo"]
sio2_std = chem["sio2"]/chem_tot
al2o3_std = chem["al2o3"]/chem_tot
tio2_std = chem["tio2"]/chem_tot
na2o_std = chem["na2o"]/chem_tot
k2o_std = chem["k2o"]/chem_tot
mgo_std = chem["mgo"]/chem_tot
cao_std = chem["cao"]/chem_tot
feo_std = chem["feo"]/chem_tot
ifst_std = sio2_std *ifs_si + 2*al2o3_std*ifs_al + tio2_std*ifs_ti
ifsm_std = 2*na2o_std *ifs_na + 2*k2o_std*ifs_k + cao_std*ifs_ca + mgo_std*ifs_mg + feo_std*ifs_fe2
#### NATURAL MORBS
chem_nat_tot = chem_nat["sio2"]+chem_nat["al2o3"]+chem_nat["tio2"]+chem_nat["na2o"]+chem_nat["k2o"]+chem_nat["mgo"]+chem_nat["cao"]+chem_nat["feo"]
chem_nat_m = chem_nat["na2o"]+chem_nat["k2o"]+chem_nat["mgo"]+chem_nat["cao"]+chem_nat["feo"]
sio2_nat = chem_nat["sio2"]/chem_nat_tot
al2o3_nat = chem_nat["al2o3"]/chem_nat_tot
tio2_nat = chem_nat["tio2"]/chem_nat_tot
na2o_nat = chem_nat["na2o"]/chem_nat_tot
k2o_nat = chem_nat["k2o"]/chem_nat_tot
mgo_nat = chem_nat["mgo"]/chem_nat_tot
cao_nat = chem_nat["cao"]/chem_nat_tot
feo_nat = chem_nat["feo"]/chem_nat_tot
ifst_nat = sio2_nat *ifs_si + 2*al2o3_nat*ifs_al + tio2_nat*ifs_ti
ifsm_nat = 2*na2o_nat*ifs_na + 2*k2o_nat*ifs_k + cao_nat*ifs_ca + mgo_nat*ifs_mg + feo_nat*ifs_fe2
#
#### DEFINITION OF THE VARIABLE THAT IS USED IN CALIBRATION
#
IFS_MT_std = ifsm_std
IFS_MT_nat = ifsm_nat
print("\nCorrelation coefficient between IFS(M) and I_930:")
print(np.corrcoef(IFS_MT_nat,I930_nat))
popt_K01, pcov_K01 = op.curve_fit(rp.linear,fe3_standards,I930_std)
popt_K02, pcov_K02 = op.curve_fit(rp.linear,IFS_MT_nat,I930_nat)
params_K01 = uncertainties.correlated_values(popt_K01, pcov_K01)
params_K02 = uncertainties.correlated_values(popt_K02, pcov_K02)
# K0 corrected by difference of Y at the IFS_MT of the standard
K0_ = params_K02[0]-(rp.linear(IFS_MT_std[0],*params_K02)-params_K01[0])
K1_ = params_K01[1] # slope of the Fe3 vs I930 relationship
K2_ = params_K02[1] # slope of the IFS vs I930 relationship
print("\nParameters of eq. 6 are:")
print("K0 "+str(K0_))
print("K1 "+str(K1_))
print("K2 "+str(K2_))
#
# Figure 9
#
# construct the isoredox curves
model_ifs = np.arange(80, 100,1.0)
model_redox = np.arange(0,1.1,0.1)
plt.figure(figsize=(3.22,3.22),dpi=300)
for i in range(0,len(IFS_MT_nat),5):
plt.errorbar(np.mean(IFS_MT_nat[i:i+5]),
np.mean(I930_nat[i:i+5]),
yerr=np.std(I930_nat[i:i+5],ddof=1),fmt=".",color="k",mfc="none")
for i in range(0,len(IFS_MT_std),5):
plt.errorbar(np.mean(IFS_MT_std[i:i+5]),
np.mean(I930_std[i:i+5]),
yerr=np.std(I930_std[i:i+5],ddof=1),
fmt=".",
color=s_m.to_rgba(fe3_standards[i]))
for i in range(len(model_redox)):
plt.plot(model_ifs,
un.nominal_values((K0_+model_redox[i]*K1_) + K2_*model_ifs),
"--",
color=s_m.to_rgba(model_redox[i])) # calling the ScalarMappable that was initialised with c_m and norm
c_bar = plt.colorbar(s_m) # we plot the colorbar, using again our ScalarMappable
c_bar.set_label(r"Fe$^{3+}$/Fe\textsuperscript{TOT}")
plt.xticks([80,85,90,95,100])
plt.xlim(80,100)
plt.ylim(0.6,1.0)
plt.xlabel(r"IFS(M), nm$^{-2}$")
plt.ylabel(r"I$_{930}$, normalised")
plt.tight_layout()
plt.savefig("./figures/Figure9.pdf")
print("\nChemistry-corrected Raman redox of natural MORBs is now:")
fe_nat_corrected = un.nominal_values((I930_nat-K2_*IFS_MT_nat - K0_)/K1_)
print("Mean: %.3f" % np.mean(fe_nat_corrected))
print("Std: %.3f"% np.std(fe_nat_corrected,ddof=1))
In [40]:
#
# train-test splitting
#
train_nat, test_nat, idxtrain_nat, idxtest_nat = rp.chemical_splitting(liste_natural,'sio2',split_fraction=0.3)
y_train_nat = liste_natural.loc[idxtrain_nat[0],["sio2","al2o3","tio2","feo","mgo","cao","na2o","k2o"]]
X_train_nat = y_natural[:,idxtrain_nat[0]]
y_test_nat = liste_natural.loc[idxtest_nat[0],["sio2","al2o3","tio2","feo","mgo","cao","na2o","k2o"]]
X_test_nat = y_natural[:,idxtest_nat[0]]
#
# scaling
#
X_scaler_nat = sklearn.preprocessing.StandardScaler()
Y_scaler_nat = sklearn.preprocessing.StandardScaler()
X_scaler_nat.fit(X_train_nat.T)
Y_scaler_nat.fit(y_train_nat)
# scaling the data in all cases, it may not be used during the fit later
X_train_nat_sc = X_scaler_nat.transform(X_train_nat.T)
X_test_nat_sc = X_scaler_nat.transform(X_test_nat.T)
#
# parameters for neural net
#
# 1 layer, tanh activation units, solver lbfgs, early stopping on
prm_nn_nat = dict(hidden_layer_sizes=(20,),solver = 'lbfgs',
alpha=.1,activation='relu',max_iter=1000)
#
# declaring our neural net object and the bagging regressor, see example at https://github.com/charlesll/rampy/blob/master/examples/Machine%20Learning%20Regression.ipynb
#
nn_ = MLPRegressor(**prm_nn_nat)
#
# TRAINING
#
nn_.fit(X_train_nat_sc,y_train_nat)
#
# PREDICTIONS
#
y_test_nat_pred = nn_.predict(X_test_nat_sc)
y_train_nat_pred = nn_.predict(X_train_nat_sc)
In [41]:
list_chemicals = [r"A) SiO$_2$",r"B) Al$_2$O$_3$",r"C) TiO$_2$",r"D) FeO$\textsubscript{TOT}$",
r"E) MgO",r"F) CaO",r"G) Na$_2$O",r"H) K$_2$O"]
low_lims = [0.49,0.08,0.00,0.06,0.08,0.10,0.01,0.]
high_lims = [0.57,0.115,0.020,0.14,0.16,0.17,0.04,0.004]
plt.figure(figsize=(5,8))
for i in range(8):
plt.subplot(4,2,i+1)
for j in range(0,y_train_nat_pred.shape[0],5):
plt.errorbar(y_train_nat.iloc[j,i]*100,
np.mean(y_train_nat_pred[j:j+5,i]*100),
yerr=np.std(y_train_nat_pred[j:j+5,i]*100,ddof=1),
fmt="o",color="k",mfc="none",ms=5)
for j in range(0,y_test_nat_pred.shape[0],5):
plt.errorbar(y_test_nat.iloc[j,i]*100,
np.mean(y_test_nat_pred[j:j+5,i]*100),
yerr=np.std(y_test_nat_pred[j:j+5,i]*100,ddof=1),
fmt="*",color="k",mfc="none")
plt.plot([0,100],[0,100],"k--")
plt.xlim(low_lims[i]*100,high_lims[i]*100)
plt.ylim(low_lims[i]*100,high_lims[i]*100)
plt.annotate(list_chemicals[i],xy=(0.1,0.8),xycoords="axes fraction")
if i == 6 or i == 7:
plt.xlabel("Measured, mol\%")
if i % 2 == 0:
plt.ylabel("Predicted, mol\%")
plt.tight_layout()
In [46]:
nn_.fit(X_train_nat_sc,y_train_nat)
#
# PREDICTIONS
#
y_test_nat_pred = nn_.predict(X_test_nat_sc)
y_train_nat_pred = nn_.predict(X_train_nat_sc)
print("Training error is: %.5f" %np.sqrt(mean_squared_error(y_train_nat_pred,y_train_nat)))
print("Testing error is: %.5f" %np.sqrt(mean_squared_error(y_test_nat_pred,y_test_nat)))
In [53]:
# we re-do the figure with the newly trained network
# we add errors also in the subplots
ticks = (np.arange(50,58,2.),np.arange(8,12,1),np.arange(0,2.5,0.5),
np.arange(6,15,2),np.arange(8,18,2),np.arange(10,18,2),
np.arange(1,5,1),np.arange(0,0.5,0.1))
plt.figure(figsize=(5,8),dpi=300)
for i in range(8):
ax = plt.subplot(4,2,i+1)
for j in range(0,y_train_nat_pred.shape[0],5):
plt.errorbar(y_train_nat.iloc[j,i]*100,
np.mean(y_train_nat_pred[j:j+5,i]*100),
yerr=np.std(y_train_nat_pred[j:j+5,i]*100,ddof=1),
fmt="o",color="k",mfc="none",ms=5)
for j in range(0,y_test_nat_pred.shape[0],5):
plt.errorbar(y_test_nat.iloc[j,i]*100,
np.mean(y_test_nat_pred[j:j+5,i]*100),
yerr=np.std(y_test_nat_pred[j:j+5,i]*100,ddof=1),
fmt="*",color="k",mfc="none")
plt.plot([0,100],[0,100],"k--")
if i < 7:
plt.annotate(list_chemicals[i],xy=(0.1,0.8),xycoords="axes fraction")
plt.annotate("$\sigma$(train): {:.2f}".format(100*np.sqrt(mean_squared_error(y_train_nat_pred[:,i],y_train_nat.iloc[:,i]))),xy=(0.95,0.2),xycoords="axes fraction",ha="right")
plt.annotate("$\sigma$(test): {:.2f}".format(100*np.sqrt(mean_squared_error(y_test_nat_pred[:,i],y_test_nat.iloc[:,i]))),xy=(0.95,0.1),xycoords="axes fraction",ha="right")
else:
plt.annotate(list_chemicals[i],xy=(0.95 ,0.3),xycoords="axes fraction",ha="right")
plt.annotate("$\sigma$(train): {:.2f}".format(100*np.sqrt(mean_squared_error(y_train_nat_pred[:,i],y_train_nat.iloc[:,i]))),xy=(0.95,0.2),xycoords="axes fraction",ha="right")
plt.annotate("$\sigma$(test): {:.2f}".format(100*np.sqrt(mean_squared_error(y_test_nat_pred[:,i],y_test_nat.iloc[:,i]))),xy=(0.95,0.1),xycoords="axes fraction",ha="right")
if i == 6 or i == 7:
plt.xlabel("Measured, mol\%")
if i % 2 == 0:
plt.ylabel("Predicted, mol\%")
ax.get_yaxis().set_label_coords(-0.22,0.5) # to be sure y axis are at the same level
from matplotlib.ticker import FormatStrFormatter
ax.yaxis.set_major_formatter(FormatStrFormatter('%.1f'))
ax.xaxis.set_major_formatter(FormatStrFormatter('%.1f'))
plt.xlim(low_lims[i]*100,high_lims[i]*100)
plt.ylim(low_lims[i]*100,high_lims[i]*100)
plt.xticks(ticks[i])
plt.yticks(ticks[i])
plt.tight_layout()
plt.savefig("./figures/Figure10.pdf")
In [ ]:
fe3_out_standards = {'Intensity': intensity_results[:,2],
'Mixing': Fox_results[:,0],
'ALS MCR': conc_als_mcr_mean[:,2],
'NeuralNets': nn_all_mean,
'KernelRidge': kr_all_mean,
'SVM': svr_all_mean}
df_out = pd.DataFrame(data=fe3_out_standards)
df_out.to_csv('out_table1.csv')
In [ ]:
liste_nat_out = liste_natural.copy()
liste_nat_out_std = liste_natural.copy()
liste_nat_out["Fe3+/FeTOT"] = fe_nat_corrected
liste_nat_out_std["Fe3+/FeTOT"] = fe_nat_corrected
l_o = liste_nat_out.groupby("sample").aggregate("mean")
l_o["ese Fe3+/FeTOT"] = liste_nat_out_std.groupby("sample").aggregate("std").loc[:,"Fe3+/FeTOT"]
l_o.to_csv("TableS1.csv")
In [ ]: