In [1]:

    
import pandas as pd
import numpy as np

import latools as la

from comparison_tools import helpers, stats_zircon, plots_zircon

import matplotlib.pyplot as plt
%matplotlib inline

Data Collection Parameters

See Burnham & Berry (2016).

Data Format Description



In [2]:

    
dataformat = {'genfromtext_args': {'delimiter': ',',
                                   'skip_header': 4,
                                   'skip_footer': 3},
              'column_id': {'name_row': 3,
                            'delimiter': ',',
                            'timecolumn': 0,
                            'pattern': '([A-z]{1,2}[0-9]{1,3})'},
              'meta_regex': {2: (['date', 'method'],
                                 '([0-9/]+ [0-9:]+ [AMP]+) using (.*?),')}
             }

Load Data



In [3]:

    
dat = la.analyse('raw_data/zircon/', dataformat=dataformat,
                 srm_identifier='NIST', internal_standard='Si29')









    



Loading Data: 100%|██████████| 34/34 [00:00<00:00, 179.52it/s]





    



Starting analysis using "UCD-AGILENT" configuration:
  34 Data Files Loaded: 6 standards, 28 samples
  Analytes: Li7 Mg24 Al27 Si29 P31 Ca43 Ti49 Y89 La139 Ce140 Pr141 Nd146 Sm147 Eu153 Gd157 Tb159 Dy163 Ho165 Er166 Tm169 Yb172 Lu175 Hf177 Pb206 Th232 U238
  Internal Standard: Si29



In [4]:

    
sample = '119'
_ = dat.data[sample].tplot()

Data processing



In [5]:

    
dat.despike()









    



Despiking: 100%|██████████| 34/34 [00:00<00:00, 805.26it/s]



In [6]:

    
_ = dat.data[sample].tplot()



In [7]:

    
dat.autorange(on_mult=(1.5, 1), off_mult=(1., 3.))









    



AutoRange: 100%|██████████| 34/34 [00:00<00:00, 66.13it/s]



In [8]:

    
_ = dat.data[sample].tplot(ranges=True)



In [9]:

    
dat.bkg_calc_weightedmean(weight_fwhm=800)



In [10]:

    
dat.bkg_plot()









    



Plotting backgrounds: 100%|██████████| 26/26 [00:01<00:00, 12.83it/s]






    Out[10]:





(<Figure size 540x360 with 1 Axes>,
 <matplotlib.axes._axes.Axes at 0x7f54f017e278>)



In [11]:

    
dat.bkg_subtract()









    



Background Subtraction: 100%|██████████| 34/34 [00:01<00:00, 17.59it/s]



In [12]:

    
_ = dat.data[sample].tplot(ranges=True)



In [13]:

    
dat.ratio()









    



Ratio Calculation: 100%|██████████| 34/34 [00:00<00:00, 64.51it/s]



In [14]:

    
_ = dat.data[sample].tplot(ranges=True)



In [15]:

    
dat.calibrate(srms_used='NIST610')









    



Applying Calibrations: 100%|██████████| 34/34 [00:02<00:00, 19.36it/s]



In [16]:

    
_ = dat.calibration_plot(ncol=4)



In [17]:

    
_ = dat.data[sample].tplot(ranges=True)

Filtering

Sample Info:

Zircon Si wt%: ~16

Contaminant Indicators:

Ca
La (< 320 ppb)
Ti (< 12 ppm)
Mg
Al



In [18]:

    
dat.analytes









    Out[18]:





array(['Li7', 'Mg24', 'Al27', 'Si29', 'P31', 'Ca43', 'Ti49', 'Y89',
       'La139', 'Ce140', 'Pr141', 'Nd146', 'Sm147', 'Eu153', 'Gd157',
       'Tb159', 'Dy163', 'Ho165', 'Er166', 'Tm169', 'Yb172', 'Lu175',
       'Hf177', 'Pb206', 'Th232', 'U238'], dtype='<U5')



In [19]:

    
# subsets of a few contaminant-related elements, and a few elements of interest
contam = ['La139', 'Ti49', 'Mg24', 'Al27']
interest = ['Pb206', 'Hf177', 'Nd146']



In [20]:

    
_ = dat.data[sample].tplot(contam, ranges=True)



In [21]:

    
_ = dat.data[sample].tplot(interest, ranges=True)

This shows us that some of the elements we're interested in (e.g. Nd) are influence by contamination

Data Overview



In [22]:

    
fig, axs = dat.crossplot(contam + interest, mode='scatter', filt=False)









    



Drawing Plots: 100%|██████████| 21/21 [00:00<00:00, 113.93it/s]

Thresholds



In [23]:

    
from latools.helpers.chemistry import to_molar_ratio



In [24]:

    
# calculate molar ratio thresholds
La_thresh = to_molar_ratio(320e-9, 16e-2, 138.9, 28.09) * 10
Ti_thresh = to_molar_ratio(12e-6, 16e-2, 47.9, 28.09) * 5



In [25]:

    
dat.filter_clear()
dat.filter_threshold('La139', La_thresh)









    



Threshold Filter: 100%|██████████| 28/28 [00:00<00:00, 13738.95it/s]



In [26]:

    
# dat.filter_clear()
dat.filter_threshold('Ti49', Ti_thresh)









    



Threshold Filter: 100%|██████████| 28/28 [00:00<00:00, 10492.32it/s]



In [27]:

    
dat.filter_on('Ti49_thresh_below', show_status=False)
dat.filter_on('La139_thresh_below', show_status=True)









    



Subset: All Samples

n  Filter Name           Li7    Mg24   Al27   Si29   P31    Ca43   Ti49   Y89    La139  Ce140  Pr141  Nd146  Sm147  Eu153  Gd157  Tb159  Dy163  Ho165  Er166  Tm169  Yb172  Lu175  Hf177  Pb206  Th232  U238   
0  La139_thresh_below    True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   
1  La139_thresh_above    False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  
2  Ti49_thresh_below     True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   
3  Ti49_thresh_above     False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False



In [28]:

    
_ = dat.filter_nremoved()









    



Sample     tot flt %rm 
117          63  23  63
119          60  52  13
120          63  51  19
121          61  42  31
126          64   0 100
Temora-5     65  65   0
127          63  63   0
128          62  62   0
131          64  64   0
134          66  59  11
135          64  64   0
Temora-6     63  63   0
139          63  60   5
140          65  65   0
141          62  62   0
143          64  55  14
144          62  55  11
146          61  53  13
147          63  63   0
148          65  65   0
149          64  13  80
150          64  58   9
151          62  62   0
152          61  61   0
153          65  61   6
154          64  64   0
155          63  63   0
157          63  63   0



In [29]:

    
fig, axs = dat.crossplot(contam + interest, mode='scatter', filt=True)









    



Drawing Plots: 100%|██████████| 21/21 [00:00<00:00, 111.08it/s]



In [30]:

    
_ = dat.data[sample].tplot(contam, filt=True)

This has done a pretty good job of removing the bad data at the end, but the contamination at the start is still included.

'Filter Defragmentation'



In [31]:

    
dat.filter_defragment(4, 'exclude')



In [32]:

    
dat.filter_off()
dat.filter_on('defrag')

Signal Optimisation

To select the longest contiguous data region where the means and standard deviation of target elements are minimised.



In [33]:

    
dat.optimise_signal(['Ti49', 'La139', 'Al27'], threshold_mode='kde_first_max', x_bias=0.2)









    



Optimising Data selection: 100%|██████████| 28/28 [00:02<00:00, 10.19it/s]





    



A Few Problems:
126_1: Optmisation failed. No contiguous data regions longer than 5 points.

  *** Check Optimisation Plots ***



In [34]:

    
dat.optimisation_plots(filt=True)









    



Drawing Plots: 100%|██████████| 28/28 [00:15<00:00,  1.68it/s]



In [35]:

    
dat.filter_off()
dat.filter_on('optim', show_status=True)









    



Subset: All Samples

n  Filter Name                 Li7    Mg24   Al27   Si29   P31    Ca43   Ti49   Y89    La139  Ce140  Pr141  Nd146  Sm147  Eu153  Gd157  Tb159  Dy163  Ho165  Er166  Tm169  Yb172  Lu175  Hf177  Pb206  Th232  U238   
0  La139_thresh_below          False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  
1  La139_thresh_above          False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  
2  Ti49_thresh_below           False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  
3  Ti49_thresh_above           False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  
4  defrag_exclude_4            False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  
5  optimise_Ti49_La139_Al27    True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   True



In [36]:

    
_ = dat.data[sample].tplot(contam, filt=True)

Done a pretty good job!

Be conservative: Filter Trimming



In [37]:

    
# trim filter edges, to be conservative
dat.filter_trim(start=1, end=1)



In [38]:

    
dat.filter_off()
dat.filter_on('trim')

Effect of all filtering



In [39]:

    
fig, axs = dat.crossplot(contam, mode='scatter', filt=True)









    



Drawing Plots: 100%|██████████| 6/6 [00:00<00:00, 161.81it/s]



In [40]:

    
# f, a = dat.gradient_crossplot(contam, mode='scatter', win=5, filt=True)

Save minimal export

This produces a .zip file containing all the raw data, the SRM values you used, and a record of everything you did to the data.

the zip file can be opened with latools.reproduce, to recreate your entire analysis.



In [41]:

    
dat.minimal_export(path='raw_data/zircon_export/minimal_export.zip')

Comparison to Reference Data



In [42]:

    
import re

import numpy as np
import pandas as pd

from latools.helpers.chemistry import elements, calc_M, to_mass_fraction

Load Reference Data



In [43]:

    
rd = helpers.load_reference_data('zircon_reference')



In [44]:

    
rd.index = rd.index.droplevel('rep')

Get LAtools Stats



In [45]:

    
# 'H15' stats are Huber 'robust' statistics, which recursively remove outliers
dat.sample_stats(stats=['H15_mean', 'H15_se'])









    



Calculating Stats: 100%|██████████| 34/34 [00:07<00:00,  4.57it/s]



In [46]:

    
las = dat.getstats()



In [47]:

    
# remove 'replicate' from index
las.index = las.index.droplevel('rep')



In [48]:

    
# convert lead 206 to total lead
els = elements().set_index('isotope')

las.loc[:,'Pb206'] /= (100 / els.loc[206, 'percent'])

Convert Molar Ratio data to ppm



In [49]:

    
# rename columns to element names
ename = re.compile('[A-z]+')
las.columns = [ename.match(c).group() for c in las.columns]



In [50]:

    
# load mass list
els = elements(all_isotopes=False)



In [51]:

    
els['Si']









    Out[51]:





28.08541284261777



In [52]:

    
# calculate wt% Si from wt% SiO2
rd.loc[:, 'wtpcSi'] = rd.loc[:, 'SiO2'] * els['Si'] / calc_M('SiO2')



In [53]:

    
# transfer wt% Si to latools data
la_ppm = las.join(rd.loc[:, 'wtpcSi'], how='inner')



In [54]:

    
# calculate mass fractions
for c in las.columns:
    la_ppm.loc[:, c] = to_mass_fraction(las.loc[:, c], la_ppm.loc[:, 'wtpcSi'] * 1e-2, 
                                        els[ename.match(c).group()], els['Si'])



In [55]:

    
# convert Al to wt% Al2
la_ppm.loc[:, 'Al'] *= (2 * 1e-4)



In [56]:

    
# isolate means
lad = la_ppm.loc['H15_mean',:].join(la_ppm.loc['H15_se', :], rsuffix='_err') * 1e6  # convert to ppm



In [57]:

    
# combine data
comp = lad.join(rd, lsuffix='_la', rsuffix='_rd')

Comparison Plots



In [58]:

    
from comparison_tools.plots import get_panel_bounds



In [59]:

    
fig, axs = plots_zircon.bland_altman_plots(comp)



In [60]:

    
fig.savefig('Figures/zircon_comparison.pdf')
fig.savefig('Figures/zircon_comparison.png', dpi=200)



In [61]:

    
stat = stats_zircon.comparison_stats(comp)



In [62]:

    
stat.to_csv('Stats/zircon_stats.csv')