In [1]:
import re
import pandas as pd

In [2]:
gd = pd.read_csv('rawdata/GeoRem_Preferred_170622.csv', comment='#')

In [3]:
els = pd.read_pickle('resources/elements.pkl')

In [ ]:


In [4]:
def calc_M(molecule):
    """
    Returns molecular mass of molecule.
    
    Where molecule is in standard chemical notation,
    e.g. 'CO2' or 'HCO3'
    
    NOTE: Brackets not supported - i.e. B(OH)4 must be
    written as BO4H4
    """
    # break the molecule up into a list of (Element, N) pairs
    comp = re.findall('([A-Z][a-z]{0,})([0-9]{0,})',
                      molecule)
    
    # Calculate their mass
    M = 0
    for el, n in comp:
        if n == '':
            n = 1
        else:
            n = float(n)
        m = els[el]
        
        M += m * n
    return M

In [5]:
# calculate molecular weight for all compounds
gd.loc[:,'M'] = gd.Item.apply(calc_M)

In [6]:
udict = {'%m/m': 100,
         'ug/g': 1e6}

In [7]:
gd.loc[:,'g/g'] = gd.Value / [udict[u] for u in gd.Unit]
gd.loc[:,'g/g_err'] = gd.Uncertainty / [udict[u] for u in gd.Unit]

In [8]:
gd.loc[:,'mol/g'] = gd.loc[:,'g/g'] / gd.loc[:,'M']
gd.loc[:,'mol/g_err'] = gd.loc[:,'g/g_err'] / gd.loc[:,'M']

In [9]:
gd.to_csv('GeoRem_Preferred_170622.csv', index=False)

In [153]:
srmdat = gd.copy()

Create Ratios in latools


In [154]:
internal_standard = 'Si29'

In [155]:
internal_el = re.match('([A-Z][a-z]{0,})',internal_standard).groups()[0]

In [157]:
denom


Out[157]:
Item SRM Value Uncertainty Uncertainty_Type Unit GeoReM_bibcode Reference M g/g mol/g g/g_err mol/g_err
4 SiO2 NIST610 69.7 0.5 95%CL %m/m GeoReM 5211 Jochum et al 2011 60.0843 0.697 0.0116 0.005 0.000083

In [161]:
for srm in srmdat.SRM.unique():
    ind = srmdat.SRM == srm
    
    # find denominator
    denom = srmdat.loc[srmdat.Item.str.contains(internal_el) & ind]
    
    # calculate denominator composition
    comp = re.findall('([A-Z][a-z]{0,})([0-9]{0,})',
                      denom.Item.values[0])
    # determine stoichiometric multiplier
    N = [n for el, n in comp if el == internal_el][0]
    if N == '':
        N = 1
    else:
        N = float(N)
    
    srmdat.loc[ind, 'mol_ratio'] = srmdat.loc[ind, 'mol/g'] / (denom['mol/g'].values * N)
    srmdat.loc[ind, 'mol_ratio_err'] = ((srmdat.loc[ind, 'mol/g_err'] / srmdat.loc[ind, 'mol/g'])**2 +
                                        (denom['mol/g_err'].values / denom['mol/g'].values))**0.5 * srmdat.loc[ind, 'mol_ratio']
#     srmdat.loc[ind]

In [162]:
srmdat


Out[162]:
Item SRM Value Uncertainty Uncertainty_Type Unit GeoReM_bibcode Reference M g/g mol/g g/g_err mol/g_err mol_ratio mol_ratio_err
0 CO2 NIST610 0.0002 NaN NaN %m/m GeoReM 5211 Jochum et al 2011 44.009500 2.000000e-06 4.544473e-08 NaN NaN 3.917525e-06 NaN
1 H2O NIST610 0.0130 NaN NaN %m/m GeoReM 5211 Jochum et al 2011 18.015280 1.300000e-04 7.216097e-06 NaN NaN 6.220575e-04 NaN
2 Na2O NIST610 13.4000 0.300 95%CL %m/m GeoReM 5211 Jochum et al 2011 61.978940 1.340000e-01 2.162025e-03 3.000000e-03 4.840354e-05 1.863755e-01 1.632763e-02
3 Al2O3 NIST610 1.9500 0.040 95%CL %m/m GeoReM 5211 Jochum et al 2011 101.961276 1.950000e-02 1.912491e-04 4.000000e-04 3.923058e-06 1.648647e-02 1.436725e-03
4 SiO2 NIST610 69.7000 0.500 95%CL %m/m GeoReM 5211 Jochum et al 2011 60.084300 6.970000e-01 1.160037e-02 5.000000e-03 8.321641e-05 1.000000e+00 8.500036e-02
5 CaO NIST610 11.4000 0.200 95%CL %m/m GeoReM 5211 Jochum et al 2011 56.077400 1.140000e-01 2.032905e-03 2.000000e-03 3.566499e-05 1.752448e-01 1.515780e-02
6 H NIST610 15.0000 NaN NaN ug/g GeoReM 5211 Jochum et al 2011 1.007940 1.500000e-05 1.488184e-05 NaN NaN 1.282876e-03 NaN
7 Li NIST610 468.0000 24.000 95%CL ug/g GeoReM 5211 Jochum et al 2011 6.941000 4.680000e-04 6.742544e-05 2.400000e-05 3.457715e-06 5.812354e-03 5.754949e-04
8 Be NIST610 476.0000 31.000 95%CL ug/g GeoReM 5211 Jochum et al 2011 9.012182 4.760000e-04 5.281740e-05 3.100000e-05 3.439788e-06 4.553079e-03 4.864557e-04
9 B NIST610 350.0000 56.000 95%CL ug/g GeoReM 5211 Jochum et al 2011 10.811000 3.500000e-04 3.237443e-05 5.600000e-05 5.179909e-06 2.790811e-03 5.052339e-04
10 F NIST610 304.0000 NaN NaN ug/g GeoReM 5211 Jochum et al 2011 18.998403 3.040000e-04 1.600134e-05 NaN NaN 1.379382e-03 NaN
11 Mg NIST610 432.0000 29.000 95%CL ug/g GeoReM 5211 Jochum et al 2011 24.305000 4.320000e-04 1.777412e-05 2.900000e-05 1.193170e-06 1.532203e-03 1.655913e-04
12 P NIST610 413.0000 46.000 95%CL ug/g GeoReM 5211 Jochum et al 2011 30.973761 4.130000e-04 1.333387e-05 4.600000e-05 1.485128e-06 1.149435e-03 1.608352e-04
13 S NIST610 575.0000 32.000 95%CL ug/g GeoReM 5211 Jochum et al 2011 32.065000 5.750000e-04 1.793232e-05 3.200000e-05 9.979729e-07 1.545841e-03 1.566629e-04
14 Cl NIST610 274.0000 67.000 95%CL ug/g GeoReM 5211 Jochum et al 2011 35.453000 2.740000e-04 7.728542e-06 6.700000e-05 1.889826e-06 6.662325e-04 1.724067e-04
15 K NIST610 464.0000 21.000 95%CL ug/g GeoReM 5211 Jochum et al 2011 39.098300 4.640000e-04 1.186752e-05 2.100000e-05 5.371078e-07 1.023030e-03 9.824254e-05
16 Sc NIST610 455.0000 10.000 95%CL ug/g GeoReM 5211 Jochum et al 2011 44.955910 4.550000e-04 1.012103e-05 1.000000e-05 2.224402e-07 8.724747e-04 7.634345e-05
17 Ti NIST610 452.0000 10.000 95%CL ug/g GeoReM 5211 Jochum et al 2011 47.867000 4.520000e-04 9.442831e-06 1.000000e-05 2.089122e-07 8.140113e-04 7.125770e-05
18 V NIST610 450.0000 9.000 95%CL ug/g GeoReM 5211 Jochum et al 2011 50.941500 4.500000e-04 8.833662e-06 9.000000e-06 1.766732e-07 7.614984e-04 6.627050e-05
19 Cr NIST610 408.0000 10.000 95%CL ug/g GeoReM 5211 Jochum et al 2011 51.996100 4.080000e-04 7.846742e-06 1.000000e-05 1.923221e-07 6.764218e-04 5.964158e-05
20 Mn NIST610 444.0000 13.000 95%CL ug/g GeoReM 5211 Jochum et al 2011 54.938049 4.440000e-04 8.081830e-06 1.300000e-05 2.366302e-07 6.966874e-04 6.243376e-05
21 Fe NIST610 458.0000 9.000 95%CL ug/g GeoReM 5211 Jochum et al 2011 55.845000 4.580000e-04 8.201271e-06 9.000000e-06 1.611604e-07 7.069837e-04 6.146999e-05
22 Co NIST610 410.0000 10.000 95%CL ug/g GeoReM 5211 Jochum et al 2011 58.933200 4.100000e-04 6.957029e-06 1.000000e-05 1.696836e-07 5.997249e-04 5.285916e-05
23 Ni NIST610 458.7000 4.000 95%CL ug/g GeoReM 5211 Jochum et al 2011 58.693400 4.587000e-04 7.815189e-06 4.000000e-06 6.815076e-08 6.737018e-04 5.736223e-05
24 Cu NIST610 441.0000 15.000 95%CL ug/g GeoReM 5211 Jochum et al 2011 63.546000 4.410000e-04 6.939855e-06 1.500000e-05 2.360495e-07 5.982443e-04 5.460279e-05
25 Zn NIST610 460.0000 18.000 95%CL ug/g GeoReM 5211 Jochum et al 2011 65.409000 4.600000e-04 7.032671e-06 1.800000e-05 2.751915e-07 6.062455e-04 5.656239e-05
26 Ga NIST610 433.0000 13.000 95%CL ug/g GeoReM 5211 Jochum et al 2011 69.723000 4.330000e-04 6.210289e-06 1.300000e-05 1.864521e-07 5.353528e-04 4.810730e-05
27 Ge NIST610 447.0000 78.000 95%CL ug/g GeoReM 5211 Jochum et al 2011 72.640000 4.470000e-04 6.153634e-06 7.800000e-05 1.073789e-06 5.304689e-04 1.028928e-04
28 As NIST610 325.0000 18.000 95%CL ug/g GeoReM 5211 Jochum et al 2011 74.921600 3.250000e-04 4.337868e-06 1.800000e-05 2.402511e-07 3.739423e-04 3.784225e-05
29 Se NIST610 138.0000 42.000 95%CL ug/g GeoReM 5211 Jochum et al 2011 78.960000 1.380000e-04 1.747720e-06 4.200000e-05 5.319149e-07 1.506608e-04 4.759574e-05
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
181 Sn NIST614 1.6800 0.150 95%CL ug/g GeoReM 5211 Jochum et al 2011 118.710000 1.680000e-06 1.415214e-08 1.500000e-07 1.263584e-09 1.179364e-06 1.686721e-07
182 Sb NIST614 0.7900 0.064 95%CL ug/g GeoReM 5211 Jochum et al 2011 121.760000 7.900000e-07 6.488173e-09 6.400000e-08 5.256242e-10 5.406898e-07 7.461855e-08
183 Cs NIST614 0.6640 0.034 95%CL ug/g GeoReM 5211 Jochum et al 2011 132.905450 6.640000e-07 4.996033e-09 3.400000e-08 2.558210e-10 4.163428e-07 5.116884e-08
184 Ba NIST614 3.2000 0.090 95%CL ug/g GeoReM 5211 Jochum et al 2011 137.327000 3.200000e-06 2.330205e-08 9.000000e-08 6.553700e-10 1.941868e-06 2.237255e-07
185 La NIST614 0.7200 0.013 95%CL ug/g GeoReM 5211 Jochum et al 2011 138.905500 7.200000e-07 5.183380e-09 1.300000e-08 9.358881e-11 4.319553e-07 4.888670e-08
186 Ce NIST614 0.8130 0.025 95%CL ug/g GeoReM 5211 Jochum et al 2011 140.116000 8.130000e-07 5.802335e-09 2.500000e-08 1.784236e-10 4.835357e-07 5.603226e-08
187 Pr NIST614 0.7680 0.015 95%CL ug/g GeoReM 5211 Jochum et al 2011 140.907650 7.680000e-07 5.450378e-09 1.500000e-08 1.064527e-10 4.542055e-07 5.151606e-08
188 Nd NIST614 0.7520 0.014 95%CL ug/g GeoReM 5211 Jochum et al 2011 144.240000 7.520000e-07 5.213533e-09 1.400000e-08 9.706045e-11 4.344681e-07 4.921059e-08
189 Sm NIST614 0.7540 0.013 95%CL ug/g GeoReM 5211 Jochum et al 2011 150.360000 7.540000e-07 5.014632e-09 1.300000e-08 8.645916e-11 4.178927e-07 4.724208e-08
190 Eu NIST614 0.7700 0.016 95%CL ug/g GeoReM 5211 Jochum et al 2011 151.964000 7.700000e-07 5.066990e-09 1.600000e-08 1.052881e-10 4.222559e-07 4.798589e-08
191 Gd NIST614 0.7630 0.021 95%CL ug/g GeoReM 5211 Jochum et al 2011 157.250000 7.630000e-07 4.852146e-09 2.100000e-08 1.335453e-10 4.043520e-07 4.652715e-08
192 Tb NIST614 0.7390 0.020 95%CL ug/g GeoReM 5211 Jochum et al 2011 158.925340 7.390000e-07 4.649982e-09 2.000000e-08 1.258453e-10 3.875047e-07 4.454637e-08
193 Dy NIST614 0.7460 0.022 95%CL ug/g GeoReM 5211 Jochum et al 2011 162.500000 7.460000e-07 4.590769e-09 2.200000e-08 1.353846e-10 3.825703e-07 4.420692e-08
194 Ho NIST614 0.7490 0.015 95%CL ug/g GeoReM 5211 Jochum et al 2011 164.930320 7.490000e-07 4.541312e-09 1.500000e-08 9.094750e-11 3.784487e-07 4.295640e-08
195 Er NIST614 0.7400 0.017 95%CL ug/g GeoReM 5211 Jochum et al 2011 167.259000 7.400000e-07 4.424276e-09 1.700000e-08 1.016388e-10 3.686956e-07 4.205461e-08
196 Tm NIST614 0.7320 0.020 95%CL ug/g GeoReM 5211 Jochum et al 2011 168.934210 7.320000e-07 4.333048e-09 2.000000e-08 1.183893e-10 3.610931e-07 4.153227e-08
197 Yb NIST614 0.7770 0.021 95%CL ug/g GeoReM 5211 Jochum et al 2011 173.040000 7.770000e-07 4.490291e-09 2.100000e-08 1.213592e-10 3.741970e-07 4.301332e-08
198 Lu NIST614 0.7320 0.018 95%CL ug/g GeoReM 5211 Jochum et al 2011 174.967000 7.320000e-07 4.183646e-09 1.800000e-08 1.028765e-10 3.486428e-07 3.988470e-08
199 Hf NIST614 0.7110 0.022 95%CL ug/g GeoReM 5211 Jochum et al 2011 178.490000 7.110000e-07 3.983416e-09 2.200000e-08 1.232562e-10 3.319567e-07 3.848421e-08
200 Ta NIST614 0.8080 0.026 95%CL ug/g GeoReM 5211 Jochum et al 2011 180.947900 8.080000e-07 4.465374e-09 2.600000e-08 1.436878e-10 3.721205e-07 4.326547e-08
201 W NIST614 0.8060 0.071 95%CL ug/g GeoReM 5211 Jochum et al 2011 183.840000 8.060000e-07 4.384247e-09 7.100000e-08 3.862054e-10 3.653598e-07 5.198185e-08
202 Re NIST614 0.1700 0.008 95%CL ug/g GeoReM 5211 Jochum et al 2011 186.207000 1.700000e-07 9.129625e-10 8.000000e-09 4.296294e-11 7.608143e-08 9.223503e-09
203 Ir NIST614 0.0020 NaN NaN ug/g GeoReM 5211 Jochum et al 2011 192.217000 2.000000e-09 1.040491e-11 NaN NaN 8.670895e-10 NaN
204 Pt NIST614 2.3600 0.120 95%CL ug/g GeoReM 5211 Jochum et al 2011 195.078000 2.360000e-06 1.209773e-08 1.200000e-07 6.151386e-10 1.008160e-06 1.237540e-07
205 Au NIST614 0.4800 0.070 95%CL ug/g GeoReM 5211 Jochum et al 2011 196.966550 4.800000e-07 2.436962e-09 7.000000e-08 3.553903e-10 2.030834e-07 3.730882e-08
206 Tl NIST614 0.2730 0.020 95%CL ug/g GeoReM 5211 Jochum et al 2011 204.383300 2.730000e-07 1.335726e-09 2.000000e-08 9.785535e-11 1.113123e-07 1.487163e-08
207 Pb NIST614 2.3200 0.040 95%CL ug/g GeoReM 5211 Jochum et al 2011 207.200000 2.320000e-06 1.119691e-08 4.000000e-08 1.930502e-10 9.330909e-07 1.054844e-07
208 Bi NIST614 0.5810 0.043 95%CL ug/g GeoReM 5211 Jochum et al 2011 208.980380 5.810000e-07 2.780165e-09 4.300000e-08 2.057609e-10 2.316842e-07 3.104930e-08
209 Th NIST614 0.7480 0.006 95%CL ug/g GeoReM 5211 Jochum et al 2011 232.038100 7.480000e-07 3.223609e-09 6.000000e-09 2.585782e-11 2.686384e-07 3.009110e-08
210 U NIST614 0.8230 0.002 95%CL ug/g GeoReM 5211 Jochum et al 2011 238.028910 8.230000e-07 3.457563e-09 2.000000e-09 8.402341e-12 2.881349e-07 3.219973e-08

211 rows × 15 columns


In [134]:
N


Out[134]:
1

In [128]:
re.findall('([A-Z][a-z]{0,})([0-9]{0,})',
                      denom.Item.values[0])


Out[128]:
[('Ca', ''), ('O', '')]

In [127]:



Out[127]:
'CaO'

In [107]:
denom =

In [108]:
denom


Out[108]:
Item SRM Value Uncertainty Uncertainty_Type Unit GeoReM_bibcode Reference M g/g mol/g
5 CaO NIST610 11.4 0.2 95%CL %m/m GeoReM 5211 Jochum et al 2011 56.0774 0.114 0.002033
76 CaO NIST612 11.9 0.1 95%CL %m/m GeoReM 5211 Jochum et al 2011 56.0774 0.119 0.002122
146 CaO NIST614 11.9 0.2 95%CL %m/m GeoReM 5211 Jochum et al 2011 56.0774 0.119 0.002122

In [105]:
denom


Out[105]:
Item SRM Value Uncertainty Uncertainty_Type Unit GeoReM_bibcode Reference M g/g mol/g
5 CaO NIST610 11.4 0.2 95%CL %m/m GeoReM 5211 Jochum et al 2011 56.0774 0.114 0.002033
76 CaO NIST612 11.9 0.1 95%CL %m/m GeoReM 5211 Jochum et al 2011 56.0774 0.119 0.002122
146 CaO NIST614 11.9 0.2 95%CL %m/m GeoReM 5211 Jochum et al 2011 56.0774 0.119 0.002122