Optimise multiplet parameters using the standards


In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

import warnings
warnings.filterwarnings("ignore") # suppress annoying warnings

import sys
sys.path.append('/home/rstudio/codes')

from glob import glob
import os
from collections import defaultdict
import re

from IPython.display import display, HTML
import seaborn as sns

from pyBatman import PyBatmanPipeline, sub_dir_path

1. Setup the pipeline


In [2]:
background_dir = '/home/rstudio/NMR/calibrations/background'
output_dir = None
database_file = '/home/rstudio/codes/databases/default_db.csv'

In [3]:
input_backgrounds = sub_dir_path(background_dir)
pipeline = PyBatmanPipeline(input_backgrounds, 'cpmg', '.', database_file)

In [4]:
print pipeline.tsp_range


(-0.05, 0.05)

2. Load spiked metabolites


In [5]:
names = sorted(pipeline.db.metabolites.keys())
lower_names = {}
for name in names:
    tokens = name.split('_')
    value = tokens[0].lower()
    lower_names[name] = value
names = sorted(set(lower_names.values()))

In [6]:
for name in sorted(pipeline.db.metabolites.keys()):
    if name == 'TSP':
        continue
    print name, lower_names[name]


Acetate_1.9 acetate
Alanine_1.5 alanine
Citrate_2.7 citrate
Glucose_5.3 glucose
Glutamine_3.8 glutamine
Glycine_3.6 glycine
Histidine_7.1 histidine
Isoleucine_1.0 isoleucine
Lactate_1.3 lactate
Leucine_1.0 leucine
Lysine_3.0 lysine
Phenylalanine_7.3 phenylalanine
TMAO_3.3 tmao
Tyrosine_7.2 tyrosine
Valine_1.1 valine

In [7]:
def natural_sort(l): 
    convert = lambda text: int(text) if text.isdigit() else text.lower() 
    alphanum_key = lambda key: [ convert(c) for c in re.split('([0-9]+)', key) ] 
    return sorted(l, key = alphanum_key)

In [8]:
base_dir = '/home/rstudio/NMR/calibrations/spiked_metabolites_combined/'
metabolite_concentrations = defaultdict(list)
for name in sorted(pipeline.db.metabolites.keys()):
    if name == 'TSP':
        continue
    print '======================================================='
    print 'Loading %s' % name
    print '=======================================================' 
    
    ln = lower_names[name]
    paths = natural_sort(glob(os.path.join(base_dir, '%s*'%ln)))
    for input_spectra in paths:
        my_dir = os.path.basename(os.path.normpath(input_spectra))
        tokens = my_dir.split('_')
        metabolite = tokens[0]
        conc = int(tokens[1])
        pipeline.load_spiked(name, conc, input_spectra)
        metabolite_concentrations[name].append(conc)
    print
    print


=======================================================
Loading Acetate_1.9
=======================================================
Loading spectra for Acetate_1.9 (50)
Loading spectra for Acetate_1.9 (100)
Loading spectra for Acetate_1.9 (250)
Loading spectra for Acetate_1.9 (500)
Loading spectra for Acetate_1.9 (1000)
Loading spectra for Acetate_1.9 (1750)
Loading spectra for Acetate_1.9 (2500)

=======================================================
Loading Alanine_1.5
=======================================================
Loading spectra for Alanine_1.5 (50)
Loading spectra for Alanine_1.5 (100)
Loading spectra for Alanine_1.5 (250)
Loading spectra for Alanine_1.5 (500)
Loading spectra for Alanine_1.5 (1000)
Loading spectra for Alanine_1.5 (1750)
Loading spectra for Alanine_1.5 (2500)

=======================================================
Loading Citrate_2.7
=======================================================
Loading spectra for Citrate_2.7 (50)
Loading spectra for Citrate_2.7 (100)
Loading spectra for Citrate_2.7 (250)
Loading spectra for Citrate_2.7 (500)
Loading spectra for Citrate_2.7 (1000)
Loading spectra for Citrate_2.7 (1750)
Loading spectra for Citrate_2.7 (2500)

=======================================================
Loading Glucose_5.3
=======================================================
Loading spectra for Glucose_5.3 (50)
Loading spectra for Glucose_5.3 (100)
Loading spectra for Glucose_5.3 (250)
Loading spectra for Glucose_5.3 (500)
Loading spectra for Glucose_5.3 (2500)
Loading spectra for Glucose_5.3 (5000)
Loading spectra for Glucose_5.3 (7500)
Loading spectra for Glucose_5.3 (10000)

=======================================================
Loading Glutamine_3.8
=======================================================
Loading spectra for Glutamine_3.8 (50)
Loading spectra for Glutamine_3.8 (100)
Loading spectra for Glutamine_3.8 (250)
Loading spectra for Glutamine_3.8 (500)
Loading spectra for Glutamine_3.8 (1000)
Loading spectra for Glutamine_3.8 (1750)
Loading spectra for Glutamine_3.8 (2500)

=======================================================
Loading Glycine_3.6
=======================================================
Loading spectra for Glycine_3.6 (50)
Loading spectra for Glycine_3.6 (100)
Loading spectra for Glycine_3.6 (250)
Loading spectra for Glycine_3.6 (500)
Loading spectra for Glycine_3.6 (1000)
Loading spectra for Glycine_3.6 (1750)
Loading spectra for Glycine_3.6 (2500)

=======================================================
Loading Histidine_7.1
=======================================================
Loading spectra for Histidine_7.1 (50)
Loading spectra for Histidine_7.1 (100)
Loading spectra for Histidine_7.1 (250)
Loading spectra for Histidine_7.1 (500)
Loading spectra for Histidine_7.1 (1000)
Loading spectra for Histidine_7.1 (1750)
Loading spectra for Histidine_7.1 (2500)

=======================================================
Loading Isoleucine_1.0
=======================================================
Loading spectra for Isoleucine_1.0 (50)
Loading spectra for Isoleucine_1.0 (100)
Loading spectra for Isoleucine_1.0 (250)
Loading spectra for Isoleucine_1.0 (500)
Loading spectra for Isoleucine_1.0 (1000)
Loading spectra for Isoleucine_1.0 (1750)
Loading spectra for Isoleucine_1.0 (2500)

=======================================================
Loading Lactate_1.3
=======================================================
Loading spectra for Lactate_1.3 (50)
Loading spectra for Lactate_1.3 (100)
Loading spectra for Lactate_1.3 (250)
Loading spectra for Lactate_1.3 (500)
Loading spectra for Lactate_1.3 (2500)
Loading spectra for Lactate_1.3 (5000)
Loading spectra for Lactate_1.3 (7500)
Loading spectra for Lactate_1.3 (10000)

=======================================================
Loading Leucine_1.0
=======================================================
Loading spectra for Leucine_1.0 (50)
Loading spectra for Leucine_1.0 (100)
Loading spectra for Leucine_1.0 (250)
Loading spectra for Leucine_1.0 (500)
Loading spectra for Leucine_1.0 (1000)
Loading spectra for Leucine_1.0 (1750)
Loading spectra for Leucine_1.0 (2500)

=======================================================
Loading Lysine_3.0
=======================================================
Loading spectra for Lysine_3.0 (50)
Loading spectra for Lysine_3.0 (100)
Loading spectra for Lysine_3.0 (250)
Loading spectra for Lysine_3.0 (500)
Loading spectra for Lysine_3.0 (1000)
Loading spectra for Lysine_3.0 (1750)
Loading spectra for Lysine_3.0 (2500)

=======================================================
Loading Phenylalanine_7.3
=======================================================
Loading spectra for Phenylalanine_7.3 (50)
Loading spectra for Phenylalanine_7.3 (100)
Loading spectra for Phenylalanine_7.3 (250)
Loading spectra for Phenylalanine_7.3 (500)
Loading spectra for Phenylalanine_7.3 (1000)
Loading spectra for Phenylalanine_7.3 (1750)
Loading spectra for Phenylalanine_7.3 (2500)

=======================================================
Loading TMAO_3.3
=======================================================
Loading spectra for TMAO_3.3 (50)
Loading spectra for TMAO_3.3 (100)
Loading spectra for TMAO_3.3 (250)
Loading spectra for TMAO_3.3 (500)
Loading spectra for TMAO_3.3 (1000)
Loading spectra for TMAO_3.3 (1750)
Loading spectra for TMAO_3.3 (2500)

=======================================================
Loading Tyrosine_7.2
=======================================================
Loading spectra for Tyrosine_7.2 (50)
Loading spectra for Tyrosine_7.2 (100)
Loading spectra for Tyrosine_7.2 (250)
Loading spectra for Tyrosine_7.2 (500)
Loading spectra for Tyrosine_7.2 (1000)
Loading spectra for Tyrosine_7.2 (1750)
Loading spectra for Tyrosine_7.2 (2500)

=======================================================
Loading Valine_1.1
=======================================================
Loading spectra for Valine_1.1 (50)
Loading spectra for Valine_1.1 (100)
Loading spectra for Valine_1.1 (250)
Loading spectra for Valine_1.1 (500)
Loading spectra for Valine_1.1 (1000)
Loading spectra for Valine_1.1 (1750)
Loading spectra for Valine_1.1 (2500)

3. Optimise model parameters


In [9]:
tsp_concentration = 2320

In [10]:
db = pipeline.db
for name in sorted(db.metabolites.keys()):
    # do not optimise TSP    
    if name == 'TSP':
        continue        
    # a new copy of the db is created each time with the corrected relative intensity
    if name in metabolite_concentrations:
        std_concentrations = metabolite_concentrations[name]
        db = pipeline.update_rel_intensities(db, name, std_concentrations, tsp_concentration)


Updating relative intensities for Acetate_1.9
tsp_area=21169274.594343, metabo_area=83301.651326, correction=1.643267
tsp_area=21163049.743157, metabo_area=381744.020212, correction=3.766383
tsp_area=20762272.930678, metabo_area=894465.922444, correction=3.598151
tsp_area=22389479.049913, metabo_area=1939061.762983, correction=3.616664
tsp_area=15036920.049465, metabo_area=3389395.914378, correction=4.706455
tsp_area=15816829.311129, metabo_area=6145267.943835, correction=4.635684
tsp_area=15740019.339781, metabo_area=8613144.412149, correction=4.570324
Rel. intensities: initial = 3.000000, corrected = 3.766383

Updating relative intensities for Alanine_1.5
tsp_area=21065404.268182, metabo_area=244810.927051, correction=4.853125
tsp_area=20905811.900534, metabo_area=515435.085974, correction=5.147987
tsp_area=22137782.404933, metabo_area=1131497.528699, correction=4.268841
tsp_area=21989472.060571, metabo_area=2440849.261939, correction=4.635394
tsp_area=16476212.009087, metabo_area=4533154.528662, correction=5.744783
tsp_area=16795282.241834, metabo_area=7816143.653752, correction=5.552616
tsp_area=16314032.321757, metabo_area=11555763.425890, correction=5.915995
Rel. intensities: initial = 3.000000, corrected = 5.147987

Updating relative intensities for Citrate_2.7
tsp_area=20006127.636799, metabo_area=159860.783620, correction=3.336871
tsp_area=21036595.946455, metabo_area=331132.575445, correction=3.286676
tsp_area=21399900.702366, metabo_area=799340.991039, correction=3.119685
tsp_area=21821579.106803, metabo_area=1547433.448254, correction=2.961327
tsp_area=16131593.827955, metabo_area=2998060.814994, correction=3.880553
tsp_area=16732745.715022, metabo_area=1187979.462753, correction=0.847099
tsp_area=15792177.091668, metabo_area=7048659.840152, correction=3.727821
Rel. intensities: initial = 2.000000, corrected = 3.286676

Updating relative intensities for Glucose_5.3
tsp_area=20271973.300762, metabo_area=67927.136097, correction=1.399290
tsp_area=21455707.318879, metabo_area=87842.459908, correction=0.854854
tsp_area=21371007.726240, metabo_area=107596.118335, correction=0.420496
tsp_area=22441264.142817, metabo_area=348584.525725, correction=0.648666
tsp_area=15993976.949257, metabo_area=1709084.341291, correction=0.892478
tsp_area=15036920.049465, metabo_area=3107982.451163, correction=0.863138
tsp_area=15816829.311129, metabo_area=4475227.843094, correction=0.787707
tsp_area=15740019.339781, metabo_area=5561836.032637, correction=0.737808
Rel. intensities: initial = 0.660000, corrected = 0.821281

Updating relative intensities for Glutamine_3.8
tsp_area=20629190.812168, metabo_area=899.311529, correction=0.018205
tsp_area=21815830.830403, metabo_area=69581.537328, correction=0.665967
tsp_area=21432230.432756, metabo_area=522172.766298, correction=2.034873
tsp_area=21927606.206229, metabo_area=819688.036716, correction=1.561054
tsp_area=16287710.482878, metabo_area=1454972.932895, correction=1.865200
tsp_area=15867293.535904, metabo_area=2240004.130018, correction=1.684374
tsp_area=16039119.634238, metabo_area=3017338.905128, correction=1.571209
Rel. intensities: initial = 1.000000, corrected = 1.571209

Updating relative intensities for Glycine_3.6
tsp_area=20314458.746299, metabo_area=137808.539561, correction=2.832901
tsp_area=21351453.989867, metabo_area=277024.766938, correction=2.709079
tsp_area=21576494.850610, metabo_area=648349.826036, correction=2.509684
tsp_area=21566958.039434, metabo_area=1388151.711767, correction=2.687872
tsp_area=16693831.623790, metabo_area=2702844.305803, correction=3.380613
tsp_area=16110560.625698, metabo_area=4597357.402636, correction=3.404788
tsp_area=16352890.963761, metabo_area=6324947.244208, correction=3.230374
Rel. intensities: initial = 2.000000, corrected = 2.832901

Updating relative intensities for Histidine_7.1
tsp_area=22026686.763030, metabo_area=75187.613816, correction=1.425468
tsp_area=21372044.991168, metabo_area=108117.894817, correction=1.056287
tsp_area=21260906.222902, metabo_area=378195.955405, correction=1.485681
tsp_area=21158040.988831, metabo_area=736222.110001, correction=1.453095
tsp_area=16131593.827955, metabo_area=1398414.996888, correction=1.810045
tsp_area=16732745.715022, metabo_area=557709.539729, correction=0.397680
tsp_area=15792177.091668, metabo_area=3360353.566337, correction=1.777188
Rel. intensities: initial = 1.000000, corrected = 1.453095

Updating relative intensities for Isoleucine_1.0
tsp_area=20132738.862848, metabo_area=253602.616101, correction=5.260310
tsp_area=21675884.319460, metabo_area=481008.398503, correction=4.633470
tsp_area=22358307.812965, metabo_area=1055561.849633, correction=3.943077
tsp_area=22181542.825614, metabo_area=2531291.580011, correction=4.765527
tsp_area=16436612.934618, metabo_area=5010032.869008, correction=6.364419
tsp_area=16807662.917803, metabo_area=7781826.206614, correction=5.524165
tsp_area=16525281.337019, metabo_area=11500389.861933, correction=5.812383
Rel. intensities: initial = 3.000000, corrected = 5.260310

Updating relative intensities for Lactate_1.3
tsp_area=20663586.629899, metabo_area=763683.968404, correction=15.433643
tsp_area=21454399.043847, metabo_area=688918.489242, correction=6.704741
tsp_area=22046434.265542, metabo_area=1246001.839902, correction=4.720313
tsp_area=23248794.757716, metabo_area=2382692.094021, correction=4.279844
tsp_area=16017682.084730, metabo_area=7802887.148136, correction=4.068611
tsp_area=16287710.482878, metabo_area=16433555.942674, correction=4.213393
tsp_area=15867293.535904, metabo_area=26983247.111522, correction=4.734352
tsp_area=16039119.634238, metabo_area=31642454.318881, correction=4.119269
Rel. intensities: initial = 3.000000, corrected = 4.500079

Updating relative intensities for Leucine_1.0
tsp_area=21674106.751773, metabo_area=554543.482537, correction=10.684517
tsp_area=22271926.861013, metabo_area=989606.300796, correction=9.277590
tsp_area=22586047.727943, metabo_area=2712884.897515, correction=10.031863
tsp_area=21906986.942892, metabo_area=5428867.015483, correction=10.348730
tsp_area=16088509.625630, metabo_area=9239206.248669, correction=11.990833
tsp_area=16391587.091842, metabo_area=15287436.917165, correction=11.127718
tsp_area=16817279.307300, metabo_area=21602131.405481, correction=10.728311
Rel. intensities: initial = 6.000000, corrected = 10.684517

Updating relative intensities for Lysine_3.0
tsp_area=21450345.856254, metabo_area=185786.606325, correction=3.616934
tsp_area=23600421.734215, metabo_area=299422.350727, correction=2.649079
tsp_area=22144380.938067, metabo_area=761104.754506, correction=2.870591
tsp_area=21192153.831692, metabo_area=1590136.963674, correction=3.133429
tsp_area=16693831.623790, metabo_area=2548865.174674, correction=3.188022
tsp_area=16110560.625698, metabo_area=4537452.213669, correction=3.360422
tsp_area=16352890.963761, metabo_area=6371080.738131, correction=3.253936
Rel. intensities: initial = 2.000000, corrected = 3.188022

Updating relative intensities for Phenylalanine_7.3
tsp_area=21064525.239070, metabo_area=144022.971249, correction=2.855227
tsp_area=18168838.639971, metabo_area=243861.123073, correction=2.802502
tsp_area=22367604.546046, metabo_area=678722.106395, correction=2.534329
tsp_area=21892016.658988, metabo_area=1401443.430308, correction=2.673316
tsp_area=15483410.115918, metabo_area=2541349.763218, correction=3.427112
tsp_area=16153306.499148, metabo_area=4322967.445268, correction=3.193103
tsp_area=16145787.301548, metabo_area=6495188.546534, correction=3.359874
Rel. intensities: initial = 2.000000, corrected = 2.855227

Updating relative intensities for TMAO_3.3
tsp_area=20709568.513816, metabo_area=701288.270825, correction=14.141192
tsp_area=21216413.775432, metabo_area=1351842.222269, correction=13.304070
tsp_area=21552679.285145, metabo_area=3260062.357217, correction=12.633251
tsp_area=21459777.048782, metabo_area=7187310.942518, correction=13.986264
tsp_area=16436612.934618, metabo_area=13065520.580999, correction=16.597584
tsp_area=16807662.917803, metabo_area=21158328.406743, correction=15.019880
tsp_area=16525281.337019, metabo_area=32837336.366439, correction=16.596234
Rel. intensities: initial = 9.000000, corrected = 14.141192

Updating relative intensities for Tyrosine_7.2
tsp_area=21277623.271033, metabo_area=179622.718089, correction=3.525321
tsp_area=19087208.933404, metabo_area=325201.232549, correction=3.557462
tsp_area=21923561.372475, metabo_area=896361.615601, correction=3.414779
tsp_area=20996570.209074, metabo_area=1605786.713803, correction=3.193743
tsp_area=16079397.592313, metabo_area=1125750.213324, correction=1.461850
tsp_area=15981203.263206, metabo_area=1248153.364303, correction=0.931861
tsp_area=16048178.133035, metabo_area=1257218.908510, correction=0.654298
Rel. intensities: initial = 2.000000, corrected = 3.193743

Updating relative intensities for Valine_1.1
tsp_area=21802263.891179, metabo_area=233907.365935, correction=4.480256
tsp_area=21918515.873559, metabo_area=433501.095508, correction=4.129615
tsp_area=21616191.658158, metabo_area=1131801.400414, correction=4.373021
tsp_area=22871206.407214, metabo_area=2377003.317391, correction=4.340115
tsp_area=16079397.592313, metabo_area=11038538.427548, correction=14.334162
tsp_area=15981203.263206, metabo_area=43428.492056, correction=0.032423
tsp_area=16048178.133035, metabo_area=10929877.242705, correction=5.688268
Rel. intensities: initial = 3.000000, corrected = 4.373021


In [11]:
display(db.df)


name ppm start end couple_code j_constant rel_intensity enabled note
0 TSP 0.000 -0.050 0.0500 0 0 9.000000 Y NaN
1 Acetate_1.9 1.932 1.920 1.9500 0 0 3.766383 Y NaN
2 Alanine_1.5 1.495 1.450 1.5400 1 7.21 5.147987 Y NaN
3 Alanine_3.8 3.803 3.790 3.8150 3 7.3 1.000000 N NaN
4 Citrate_2.6 2.558 2.530 2.5900 1 15.1 2.000000 N NaN
5 Citrate_2.7 2.675 2.650 2.7000 1 15.08 3.286676 Y NaN
6 Glucose_3.6 3.558 3.535 3.5800 1,1 9.91, 3.83 1.000000 N NaN
7 Glucose_5.3 5.256 5.240 5.2750 1 3.89 0.821281 Y NaN
8 Glutamine_3.8 3.780 3.750 3.8100 2 6.18 1.571209 Y NaN
9 Glycine_3.6 3.577 3.560 3.5900 0 0 2.832901 Y NaN
10 Histidine_3.1 3.130 3.090 3.1700 1,1 7.87, 7.70 1.000000 N NaN
11 Histidine_4.0 3.985 3.950 4.0100 1,1 4.94, 3.19 1.000000 N NaN
12 Histidine_7.1 7.080 7.050 7.1250 0 0 1.453095 Y NaN
13 Histidine_7.8 7.785 7.760 7.8750 0 0 1.000000 N NaN
14 Isoleucine_0.9 0.953 0.925 0.9800 2 7.5 3.000000 N NaN
15 Isoleucine_1.0 1.024 1.000 1.0500 1 7.02 5.260310 Y NaN
16 Isoleucine_3.7 3.685 3.665 3.6950 1 4.19 1.000000 N NaN
17 Lactate_1.3 1.343 1.330 1.3600 1 6.8 4.500079 Y NaN
18 Lactate_4.1 4.126 4.100 4.1500 3 6.7 1.000000 N NaN
19 Leucine_1.0 0.976 0.950 1.0000 2 6.3 10.684517 Y NaN
20 Lysine_3.0 3.049 3.020 3.0800 2 7.75 3.188022 Y NaN
21 Lysine_3.8 3.766 3.740 3.7800 2 6.3 1.000000 N NaN
22 Threonine_1.3 1.343 1.325 1.3600 1 6.5 3.000000 N NaN
23 Threonine_3.6 3.597 3.570 3.6000 1 4.85 1.000000 N NaN
24 TMAO_3.3 3.278 3.260 3.2950 0 0 14.141192 Y NaN
25 Tyrosine_3.1 3.063 3.025 3.0950 1,1 8.05, 6.94 1.000000 N NaN
26 Tyrosine_6.9 6.917 6.890 6.9500 1 8.69 2.000000 N NaN
27 Tyrosine_7.2 7.212 7.190 7.2400 1 8.58 3.193743 Y NaN
28 Phenylalanine_3.1 3.134 3.090 3.1700 1,1 8.10, 6.56 2.000000 N NaN
29 Phenylalanine_4.0 4.005 3.960 4.0200 1,1 5.24, 2.54 1.000000 N NaN
30 Phenylalanine_7.3 7.349 7.320 7.3700 1 7.46 2.855227 Y NaN
31 Valine_1.0 1.005 0.990 1.0225 1 6.99 3.000000 N NaN
32 Valine_1.1 1.056 1.040 1.0750 1 7.04 4.373021 Y NaN
33 Valine_3.6 3.625 3.605 3.6400 1 4.48 1.000000 N NaN

4. Save the DB


In [12]:
db.df.to_csv('/home/rstudio/codes/databases/default_optimised_db_combined.csv', index=False)