In [1]:
import pandas as pd
import numpy as np

import latools as la

from comparison_tools import helpers, stats_zircon, plots_zircon

import matplotlib.pyplot as plt
%matplotlib inline

Data Collection Parameters

See Burnham & Berry (2016).

Data Format Description


In [2]:
dataformat = {'genfromtext_args': {'delimiter': ',',
                                   'skip_header': 4,
                                   'skip_footer': 3},
              'column_id': {'name_row': 3,
                            'delimiter': ',',
                            'timecolumn': 0,
                            'pattern': '([A-z]{1,2}[0-9]{1,3})'},
              'meta_regex': {2: (['date', 'method'],
                                 '([0-9/]+ [0-9:]+ [AMP]+) using (.*?),')}
             }

Load Data


In [3]:
dat = la.analyse('raw_data/zircon/', dataformat=dataformat,
                 srm_identifier='NIST', internal_standard='Si29')


Loading Data: 100%|██████████| 34/34 [00:00<00:00, 179.52it/s]
Starting analysis using "UCD-AGILENT" configuration:
  34 Data Files Loaded: 6 standards, 28 samples
  Analytes: Li7 Mg24 Al27 Si29 P31 Ca43 Ti49 Y89 La139 Ce140 Pr141 Nd146 Sm147 Eu153 Gd157 Tb159 Dy163 Ho165 Er166 Tm169 Yb172 Lu175 Hf177 Pb206 Th232 U238
  Internal Standard: Si29


In [4]:
sample = '119'
_ = dat.data[sample].tplot()


Data processing


In [5]:
dat.despike()


Despiking: 100%|██████████| 34/34 [00:00<00:00, 805.26it/s]

In [6]:
_ = dat.data[sample].tplot()



In [7]:
dat.autorange(on_mult=(1.5, 1), off_mult=(1., 3.))


AutoRange: 100%|██████████| 34/34 [00:00<00:00, 66.13it/s]

In [8]:
_ = dat.data[sample].tplot(ranges=True)



In [9]:
dat.bkg_calc_weightedmean(weight_fwhm=800)

In [10]:
dat.bkg_plot()


Plotting backgrounds: 100%|██████████| 26/26 [00:01<00:00, 12.83it/s]
Out[10]:
(<Figure size 540x360 with 1 Axes>,
 <matplotlib.axes._axes.Axes at 0x7f54f017e278>)

In [11]:
dat.bkg_subtract()


Background Subtraction: 100%|██████████| 34/34 [00:01<00:00, 17.59it/s]

In [12]:
_ = dat.data[sample].tplot(ranges=True)



In [13]:
dat.ratio()


Ratio Calculation: 100%|██████████| 34/34 [00:00<00:00, 64.51it/s]

In [14]:
_ = dat.data[sample].tplot(ranges=True)



In [15]:
dat.calibrate(srms_used='NIST610')


Applying Calibrations: 100%|██████████| 34/34 [00:02<00:00, 19.36it/s]

In [16]:
_ = dat.calibration_plot(ncol=4)



In [17]:
_ = dat.data[sample].tplot(ranges=True)


Filtering

Sample Info:

Zircon Si wt%: ~16

Contaminant Indicators:

  • Ca
  • La (< 320 ppb)
  • Ti (< 12 ppm)
  • Mg
  • Al

In [18]:
dat.analytes


Out[18]:
array(['Li7', 'Mg24', 'Al27', 'Si29', 'P31', 'Ca43', 'Ti49', 'Y89',
       'La139', 'Ce140', 'Pr141', 'Nd146', 'Sm147', 'Eu153', 'Gd157',
       'Tb159', 'Dy163', 'Ho165', 'Er166', 'Tm169', 'Yb172', 'Lu175',
       'Hf177', 'Pb206', 'Th232', 'U238'], dtype='<U5')

In [19]:
# subsets of a few contaminant-related elements, and a few elements of interest
contam = ['La139', 'Ti49', 'Mg24', 'Al27']
interest = ['Pb206', 'Hf177', 'Nd146']

In [20]:
_ = dat.data[sample].tplot(contam, ranges=True)



In [21]:
_ = dat.data[sample].tplot(interest, ranges=True)


This shows us that some of the elements we're interested in (e.g. Nd) are influence by contamination

Data Overview


In [22]:
fig, axs = dat.crossplot(contam + interest, mode='scatter', filt=False)


Drawing Plots: 100%|██████████| 21/21 [00:00<00:00, 113.93it/s]

Thresholds


In [23]:
from latools.helpers.chemistry import to_molar_ratio

In [24]:
# calculate molar ratio thresholds
La_thresh = to_molar_ratio(320e-9, 16e-2, 138.9, 28.09) * 10
Ti_thresh = to_molar_ratio(12e-6, 16e-2, 47.9, 28.09) * 5

In [25]:
dat.filter_clear()
dat.filter_threshold('La139', La_thresh)


Threshold Filter: 100%|██████████| 28/28 [00:00<00:00, 13738.95it/s]

In [26]:
# dat.filter_clear()
dat.filter_threshold('Ti49', Ti_thresh)


Threshold Filter: 100%|██████████| 28/28 [00:00<00:00, 10492.32it/s]

In [27]:
dat.filter_on('Ti49_thresh_below', show_status=False)
dat.filter_on('La139_thresh_below', show_status=True)


Subset: All Samples

n  Filter Name           Li7    Mg24   Al27   Si29   P31    Ca43   Ti49   Y89    La139  Ce140  Pr141  Nd146  Sm147  Eu153  Gd157  Tb159  Dy163  Ho165  Er166  Tm169  Yb172  Lu175  Hf177  Pb206  Th232  U238   
0  La139_thresh_below    True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   
1  La139_thresh_above    False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  
2  Ti49_thresh_below     True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   
3  Ti49_thresh_above     False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  


In [28]:
_ = dat.filter_nremoved()


Sample     tot flt %rm 
117          63  23  63
119          60  52  13
120          63  51  19
121          61  42  31
126          64   0 100
Temora-5     65  65   0
127          63  63   0
128          62  62   0
131          64  64   0
134          66  59  11
135          64  64   0
Temora-6     63  63   0
139          63  60   5
140          65  65   0
141          62  62   0
143          64  55  14
144          62  55  11
146          61  53  13
147          63  63   0
148          65  65   0
149          64  13  80
150          64  58   9
151          62  62   0
152          61  61   0
153          65  61   6
154          64  64   0
155          63  63   0
157          63  63   0

In [29]:
fig, axs = dat.crossplot(contam + interest, mode='scatter', filt=True)


Drawing Plots: 100%|██████████| 21/21 [00:00<00:00, 111.08it/s]

In [30]:
_ = dat.data[sample].tplot(contam, filt=True)


This has done a pretty good job of removing the bad data at the end, but the contamination at the start is still included.

'Filter Defragmentation'


In [31]:
dat.filter_defragment(4, 'exclude')

In [32]:
dat.filter_off()
dat.filter_on('defrag')

Signal Optimisation

To select the longest contiguous data region where the means and standard deviation of target elements are minimised.


In [33]:
dat.optimise_signal(['Ti49', 'La139', 'Al27'], threshold_mode='kde_first_max', x_bias=0.2)


Optimising Data selection: 100%|██████████| 28/28 [00:02<00:00, 10.19it/s]
A Few Problems:
126_1: Optmisation failed. No contiguous data regions longer than 5 points.

  *** Check Optimisation Plots ***


In [34]:
dat.optimisation_plots(filt=True)


Drawing Plots: 100%|██████████| 28/28 [00:15<00:00,  1.68it/s]

In [35]:
dat.filter_off()
dat.filter_on('optim', show_status=True)


Subset: All Samples

n  Filter Name                 Li7    Mg24   Al27   Si29   P31    Ca43   Ti49   Y89    La139  Ce140  Pr141  Nd146  Sm147  Eu153  Gd157  Tb159  Dy163  Ho165  Er166  Tm169  Yb172  Lu175  Hf177  Pb206  Th232  U238   
0  La139_thresh_below          False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  
1  La139_thresh_above          False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  
2  Ti49_thresh_below           False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  
3  Ti49_thresh_above           False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  
4  defrag_exclude_4            False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  False  
5  optimise_Ti49_La139_Al27    True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   True   


In [36]:
_ = dat.data[sample].tplot(contam, filt=True)


Done a pretty good job!

Be conservative: Filter Trimming


In [37]:
# trim filter edges, to be conservative
dat.filter_trim(start=1, end=1)

In [38]:
dat.filter_off()
dat.filter_on('trim')

Effect of all filtering


In [39]:
fig, axs = dat.crossplot(contam, mode='scatter', filt=True)


Drawing Plots: 100%|██████████| 6/6 [00:00<00:00, 161.81it/s]

In [40]:
# f, a = dat.gradient_crossplot(contam, mode='scatter', win=5, filt=True)

Save minimal export

This produces a .zip file containing all the raw data, the SRM values you used, and a record of everything you did to the data.

the zip file can be opened with latools.reproduce, to recreate your entire analysis.


In [41]:
dat.minimal_export(path='raw_data/zircon_export/minimal_export.zip')

Comparison to Reference Data


In [42]:
import re

import numpy as np
import pandas as pd

from latools.helpers.chemistry import elements, calc_M, to_mass_fraction

Load Reference Data


In [43]:
rd = helpers.load_reference_data('zircon_reference')

In [44]:
rd.index = rd.index.droplevel('rep')

Get LAtools Stats


In [45]:
# 'H15' stats are Huber 'robust' statistics, which recursively remove outliers
dat.sample_stats(stats=['H15_mean', 'H15_se'])


Calculating Stats: 100%|██████████| 34/34 [00:07<00:00,  4.57it/s]

In [46]:
las = dat.getstats()

In [47]:
# remove 'replicate' from index
las.index = las.index.droplevel('rep')

In [48]:
# convert lead 206 to total lead
els = elements().set_index('isotope')

las.loc[:,'Pb206'] /= (100 / els.loc[206, 'percent'])

Convert Molar Ratio data to ppm


In [49]:
# rename columns to element names
ename = re.compile('[A-z]+')
las.columns = [ename.match(c).group() for c in las.columns]

In [50]:
# load mass list
els = elements(all_isotopes=False)

In [51]:
els['Si']


Out[51]:
28.08541284261777

In [52]:
# calculate wt% Si from wt% SiO2
rd.loc[:, 'wtpcSi'] = rd.loc[:, 'SiO2'] * els['Si'] / calc_M('SiO2')

In [53]:
# transfer wt% Si to latools data
la_ppm = las.join(rd.loc[:, 'wtpcSi'], how='inner')

In [54]:
# calculate mass fractions
for c in las.columns:
    la_ppm.loc[:, c] = to_mass_fraction(las.loc[:, c], la_ppm.loc[:, 'wtpcSi'] * 1e-2, 
                                        els[ename.match(c).group()], els['Si'])

In [55]:
# convert Al to wt% Al2
la_ppm.loc[:, 'Al'] *= (2 * 1e-4)

In [56]:
# isolate means
lad = la_ppm.loc['H15_mean',:].join(la_ppm.loc['H15_se', :], rsuffix='_err') * 1e6  # convert to ppm

In [57]:
# combine data
comp = lad.join(rd, lsuffix='_la', rsuffix='_rd')

Comparison Plots


In [58]:
from comparison_tools.plots import get_panel_bounds

In [59]:
fig, axs = plots_zircon.bland_altman_plots(comp)



In [60]:
fig.savefig('Figures/zircon_comparison.pdf')
fig.savefig('Figures/zircon_comparison.png', dpi=200)

In [61]:
stat = stats_zircon.comparison_stats(comp)

In [62]:
stat.to_csv('Stats/zircon_stats.csv')