import pandas as pd
import numpy as np

import latools as la

from comparison_tools import helpers, stats_zircon, plots_zircon

import matplotlib.pyplot as plt
%matplotlib inline

Data Collection Parameters

See Burnham & Berry (2016).

Data Format Description

dataformat = {'genfromtext_args': {'delimiter': ',',
                                   'skip_header': 4,
                                   'skip_footer': 3},
              'column_id': {'name_row': 3,
                            'delimiter': ',',
                            'timecolumn': 0,
                            'pattern': '([A-z]{1,2}[0-9]{1,3})'},
              'meta_regex': {2: (['date', 'method'],
                                 '([0-9/]+ [0-9:]+ [AMP]+) using (.*?),')}

Load Data

dat = la.analyse('raw_data/zircon/', dataformat=dataformat,
                 srm_identifier='NIST', internal_standard='Si29')

Starting analysis using "UCD-AGILENT" configuration:
  34 Data Files Loaded: 6 standards, 28 samples
  Analytes: Li7 Mg24 Al27 Si29 P31 Ca43 Ti49 Y89 La139 Ce140 Pr141 Nd146 Sm147 Eu153 Gd157 Tb159 Dy163 Ho165 Er166 Tm169 Yb172 Lu175 Hf177 Pb206 Th232 U238
  Internal Standard: Si29

sample = '119'
_ =[sample].tplot()

Data processing

Despiking: 100%|██████████| 34/34 [00:00<00:00, 805.26it/s]

_ =[sample].tplot()

dat.autorange(on_mult=(1.5, 1), off_mult=(1., 3.))

AutoRange: 100%|██████████| 34/34 [00:00<00:00, 66.13it/s]

_ =[sample].tplot(ranges=True)

Background Subtraction: 100%|██████████| 34/34 [00:01<00:00, 17.59it/s]

_ =[sample].tplot(ranges=True)

Ratio Calculation: 100%|██████████| 34/34 [00:00<00:00, 64.51it/s]

_ =[sample].tplot(ranges=True)

Applying Calibrations: 100%|██████████| 34/34 [00:02<00:00, 19.36it/s]

_ = dat.calibration_plot(ncol=4)

_ =[sample].tplot(ranges=True)


Sample Info:

Zircon Si wt%: ~16

Contaminant Indicators:

  • Ca
  • La (< 320 ppb)
  • Ti (< 12 ppm)
  • Mg
  • Al

array(['Li7', 'Mg24', 'Al27', 'Si29', 'P31', 'Ca43', 'Ti49', 'Y89',
       'La139', 'Ce140', 'Pr141', 'Nd146', 'Sm147', 'Eu153', 'Gd157',
       'Tb159', 'Dy163', 'Ho165', 'Er166', 'Tm169', 'Yb172', 'Lu175',
       'Hf177', 'Pb206', 'Th232', 'U238'], dtype='<U5')

# subsets of a few contaminant-related elements, and a few elements of interest
contam = ['La139', 'Ti49', 'Mg24', 'Al27']
interest = ['Pb206', 'Hf177', 'Nd146']

_ =[sample].tplot(contam, ranges=True)

_ =[sample].tplot(interest, ranges=True)

This shows us that some of the elements we're interested in (e.g. Nd) are influence by contamination

Data Overview

fig, axs = dat.crossplot(contam + interest, mode='scatter', filt=False)

from latools.helpers.chemistry import to_molar_ratio

# calculate molar ratio thresholds
La_thresh = to_molar_ratio(320e-9, 16e-2, 138.9, 28.09) * 10
Ti_thresh = to_molar_ratio(12e-6, 16e-2, 47.9, 28.09) * 5

dat.filter_threshold('La139', La_thresh)

Threshold Filter: 100%|██████████| 28/28 [00:00<00:00, 13738.95it/s]

# dat.filter_clear()
dat.filter_threshold('Ti49', Ti_thresh)

Threshold Filter: 100%|██████████| 28/28 [00:00<00:00, 10492.32it/s]

dat.filter_on('Ti49_thresh_below', show_status=False)
dat.filter_on('La139_thresh_below', show_status=True)

Subset: All Samples

n  Filter Name           Li7    Mg24   Al27   Si29   P31    Ca43   Ti49   Y89    La139  Ce140  Pr141  Nd146  Sm147  Eu153  Gd157  Tb159  Dy163  Ho165  Er166  Tm169  Yb172  Lu175  Hf177  Pb206  Th232  U238   
0  La139_thresh_below    True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   
1  La139_thresh_above    False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  
2  Ti49_thresh_below     True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   
3  Ti49_thresh_above     False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  

_ = dat.filter_nremoved()

Sample     tot flt %rm 
117          63  23  63
119          60  52  13
120          63  51  19
121          61  42  31
126          64   0 100
Temora-5     65  65   0
127          63  63   0
128          62  62   0
131          64  64   0
134          66  59  11
135          64  64   0
Temora-6     63  63   0
139          63  60   5
140          65  65   0
141          62  62   0
143          64  55  14
144          62  55  11
146          61  53  13
147          63  63   0
148          65  65   0
149          64  13  80
150          64  58   9
151          62  62   0
152          61  61   0
153          65  61   6
154          64  64   0
155          63  63   0
157          63  63   0

fig, axs = dat.crossplot(contam + interest, mode='scatter', filt=True)

Drawing Plots: 100%|██████████| 21/21 [00:00<00:00, 111.08it/s]

_ =[sample].tplot(contam, filt=True)

This has done a pretty good job of removing the bad data at the end, but the contamination at the start is still included.

'Filter Defragmentation'

dat.filter_defragment(4, 'exclude')

Signal Optimisation

To select the longest contiguous data region where the means and standard deviation of target elements are minimised.

dat.optimise_signal(['Ti49', 'La139', 'Al27'], threshold_mode='kde_first_max', x_bias=0.2)

  *** Check Optimisation Plots ***

Drawing Plots: 100%|██████████| 28/28 [00:15<00:00,  1.68it/s]

dat.filter_on('optim', show_status=True)

Subset: All Samples

n  Filter Name                 Li7    Mg24   Al27   Si29   P31    Ca43   Ti49   Y89    La139  Ce140  Pr141  Nd146  Sm147  Eu153  Gd157  Tb159  Dy163  Ho165  Er166  Tm169  Yb172  Lu175  Hf177  Pb206  Th232  U238   
0  La139_thresh_below          False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  
1  La139_thresh_above          False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  
2  Ti49_thresh_below           False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  
3  Ti49_thresh_above           False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  
4  defrag_exclude_4            False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  
5  optimise_Ti49_La139_Al27    True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   

_ =[sample].tplot(contam, filt=True)

Done a pretty good job!

Be conservative: Filter Trimming

# trim filter edges, to be conservative
dat.filter_trim(start=1, end=1)

Effect of all filtering

fig, axs = dat.crossplot(contam, mode='scatter', filt=True)

Drawing Plots: 100%|██████████| 6/6 [00:00<00:00, 161.81it/s]

# f, a = dat.gradient_crossplot(contam, mode='scatter', win=5, filt=True)

Save minimal export

This produces a .zip file containing all the raw data, the SRM values you used, and a record of everything you did to the data.

the zip file can be opened with latools.reproduce, to recreate your entire analysis.

Comparison to Reference Data

import re

import numpy as np
import pandas as pd

from latools.helpers.chemistry import elements, calc_M, to_mass_fraction

Load Reference Data

rd = helpers.load_reference_data('zircon_reference')

rd.index = rd.index.droplevel('rep')

In [45]:
# 'H15' stats are Huber 'robust' statistics, which recursively remove outliers
dat.sample_stats(stats=['H15_mean', 'H15_se'])

Calculating Stats: 100%|██████████| 34/34 [00:07<00:00,  4.57it/s]

las = dat.getstats()

# remove 'replicate' from index
las.index = las.index.droplevel('rep')

# convert lead 206 to total lead
els = elements().set_index('isotope')

las.loc[:,'Pb206'] /= (100 / els.loc[206, 'percent'])

In [49]:
# rename columns to element names
ename = re.compile('[A-z]+')
las.columns = [ename.match(c).group() for c in las.columns]

# load mass list
els = elements(all_isotopes=False)

# calculate wt% Si from wt% SiO2
rd.loc[:, 'wtpcSi'] = rd.loc[:, 'SiO2'] * els['Si'] / calc_M('SiO2')

# transfer wt% Si to latools data
la_ppm = las.join(rd.loc[:, 'wtpcSi'], how='inner')

# calculate mass fractions
for c in las.columns:
    la_ppm.loc[:, c] = to_mass_fraction(las.loc[:, c], la_ppm.loc[:, 'wtpcSi'] * 1e-2, 
                                        els[ename.match(c).group()], els['Si'])

# convert Al to wt% Al2
la_ppm.loc[:, 'Al'] *= (2 * 1e-4)

# isolate means
lad = la_ppm.loc['H15_mean',:].join(la_ppm.loc['H15_se', :], rsuffix='_err') * 1e6  # convert to ppm

# combine data
comp = lad.join(rd, lsuffix='_la', rsuffix='_rd')

from comparison_tools.plots import get_panel_bounds

In [59]:
fig, axs = plots_zircon.bland_altman_plots(comp)

In [60]:
fig.savefig('Figures/zircon_comparison.png', dpi=200)

stat = stats_zircon.comparison_stats(comp)

In [62]: