In [1]:
%load_ext autoreload
%autoreload 2
import pandas as pd
import numpy as np
import latools as la
from IPython.display import HTML
from comparison_tools import helpers, stats_1sample, plots_1sample
%matplotlib inline
In [2]:
HTML(filename="./Parameter_Tables/iolite_data.html")
Out[2]:
In [3]:
# define data format description so it can be imported by latools
dataformat = {'genfromtext_args': {'delimiter': ',',
'skip_header': 15},
'column_id': {'name_row': 13,
'delimiter': ',',
'timecolumn': 0,
'pattern': '([0-9]{1,3}[A-z]{1,2})'},
'meta_regex': {0: (['name', 'date'],
'([A-z0-9-]+):([0-9/ :AMP]+);')}
}
In [4]:
dat = la.analyse('raw_data/iolite_data', internal_standard='43Ca', srm_identifier='NIST610',
dataformat=dataformat, names='metadata_names')
In [5]:
sample = '1308H1-1e'
In [6]:
dat.data[sample].tplot() # view raw data
Out[6]:
In [7]:
# despiking
dat.despike(noise_despiker=True, win=3)
In [8]:
dat.data[sample].tplot() # view despiked data
Out[8]:
In [9]:
dat.autorange(on_mult=[3,.6], off_mult=[.5,3])
In [10]:
dat.data[sample].tplot(ranges=True) # view autorange info
Out[10]:
In [11]:
dat.bkg_calc_weightedmean(weight_fwhm=1000, bkg_filter=True)
In [12]:
fig, ax = dat.bkg_plot()
In [13]:
dat.bkg_subtract()
In [14]:
dat.data[sample].tplot(ranges=True) # view background subtracted data
Out[14]:
In [15]:
dat.ratio('43Ca')
In [16]:
dat.data[sample].tplot(ranges=True) # view ratio data
Out[16]:
In [17]:
dat.calibrate(srms_used=['NIST610'])
In [18]:
_ = dat.calibration_plot()
In [19]:
dat.filter_clear()
In [20]:
_ = dat.crossplot()
Crossplot shows two clear contaminant clusters:
Both are likely clays, and can be removed with clustering filters.
In [21]:
dat.filter_clustering(['55Mn', '57Fe'], method='kmeans', n_clusters=2, level='population')
dat.filter_clustering(['27Al', '66Zn'], method='kmeans', n_clusters=2, level='population')
In [22]:
dat.filter_on('_0', show_status=True)
In [23]:
_ = dat.crossplot(filt=True)
There are still a lot of points elevated in Al, Mn, Fe and Zn, which tend to be associated with clay contaminants.
If associated with contaminants, they also likely have non-static signals. Let's have a look at the gradient crossplot for these elements.
In [24]:
_ = dat.gradient_crossplot(['27Al', '55Mn', '57Fe', '66Zn'], win=5, filt=True)
Al and Fe seem to have some highly variable bits - remove the 5% most variable bits with a gradient threshold filter.
In [25]:
dat.filter_gradient_threshold_percentile('27Al', [2.5, 92.5], win=5, filt=True)
In [26]:
dat.filter_gradient_threshold_percentile('57Fe', [2.5, 97.5], win=5, filt=True)
In [27]:
dat.filter_gradient_threshold_percentile('66Zn', [2.5, 97.5], win=5, filt=True)
In [28]:
# dat.filter_gradient_threshold_percentile('11B', [2.5, 97.5], win=5, filt=True)
In [29]:
dat.filter_on('27Al_2.5-95.0-grd-pcnt_inside')
dat.filter_on('57Fe_2.5-95.0-grd-pcnt_inside')
dat.filter_on('11B_2.5-97.5-grd-pcnt_inside', show_status=True)
In [30]:
fig, axs = dat.gradient_crossplot(filt=True, mode='scatter', win=5)
Everything looks a lot cleaner now... what about the concentrations?
In [31]:
fig, axs = dat.crossplot(filt=True, mode='scatter')
There are a few wild outliers - remove them using concentration thresholds.
In [32]:
dat.filter_threshold('27Al', 0.5e-3) # this will remove quite a few data points, but is a very high Al/Ca for forams
dat.filter_threshold('66Zn', 150e-6) # |
dat.filter_threshold('63Cu', 100e-6) # | - to remove v high values
dat.filter_threshold('55Mn', 90e-6) # |
dat.filter_threshold('11B', 500e-6) # To remove a few flyers
In [33]:
dat.filter_on([10, 12, 14, 16, 18], show_status=True)
In [34]:
# a last look at the data
_ = dat.crossplot(filt=True)
In [35]:
# and should look at all the traces individualls
# dat.trace_plots(filt=True)
In [36]:
dat.data['1308H1-10c'].tplot(filt=True) # view ratio data
Out[36]:
In [37]:
dat.sample_stats(stats=['mean'])
In [38]:
ld = dat.getstats() * 1e3
In [39]:
dat.minimal_export(path='raw_data/iolite_data_export/minimal_export.zip')
In [40]:
import re
import string
In [41]:
# ld = pd.read_csv('Cleaning test/RUN1_export/stat_export.csv', index_col=[0,1,2])
# ld *= 1e3 # convert to mmol/mol
ld = ld.loc['mean', :].dropna().reset_index()
# extract sample and replicate names
mtch = re.compile('([A-Z0-9]+)[_-]([0-9]+)([a-z]?)')\
for i, s in ld['sample'].iteritems():
gs = mtch.match(s).groups()
if gs[-1] == '':
r = gs[1]
s = gs[0]
else:
r = string.ascii_lowercase.index(gs[-1])
s = gs[0] + '_' + gs[1]
ld.loc[i, 'rep'] = r
ld.loc[i, 'sample'] = s
ld.set_index(['sample', 'rep'], inplace=True)
In [42]:
rd = helpers.load_reference_data('iolite_reference')
In [43]:
df = rd.join(ld)
In [44]:
# no unique samples
np.unique(rd.index.levels[0]).size
Out[44]:
In [45]:
_ = stats_1sample.pairwise_reproducibility(ld, plot=True)
In [46]:
_, rep_dists, rep_stats, _ = stats_1sample.pairwise_reproducibility(rd, plot=True)
In [47]:
rep = pd.DataFrame(rep_stats).T
rep.columns = ['50%', '95%']
rep = rep.reindex(['Mg/Ca', 'Sr/Ca', 'Ba/Ca', 'Al/Ca', 'Mn/Ca', 'Cu/Ca', 'Fe/Ca', 'Zn/Ca', 'B/Ca'])
In [48]:
rep.to_csv('reproducibility_quants.csv')
In [49]:
# _ = plots_1sample.comparison_plots(df)
In [50]:
# _ = plots_1sample.residual_plots(df)
In [51]:
from comparison_tools import plots_1sample
In [52]:
fig, axs = plots_1sample.bland_altman_plots(df, rep_stats)
In [53]:
fig.savefig('Figures/iolite_comparison.pdf')
fig.savefig('Figures/iolite_comparison.png', dpi=200)
In [54]:
stat = stats_1sample.comparison_stats(df)
In [55]:
stat.to_csv('Stats/iolite_stats.csv')