In [1]:
generate_blank_summary_file <- function(path) {
output = '### Peak Detection Parameters
peak_picking\tcentWave
ppm
peak_width
prefilter
### Peak-grouping parameters
bw
mzwid
minfrac\t0
minsamp\t2
### retention-correction parameters
retcor_method\tloess
missing
extra'
params_file = paste(path, '/xcms_params.tab', sep='')
write(output, file=params_file)
}
generate_blank_summary_file(getwd())
In [2]:
# Parse a tab-delim summary file
char_to_numeric <- function(char) {
# Take in a space-delimited character that represents
# a number or vector. Convert it to numeric.
numeric <- as.numeric(strsplit(char, " ")[[1]])
return(numeric)
}
get_params <- function(path, char_to_numeric) {
# INPUT: tab-delim file of
# space-delimited values
# OUTPUT: A labeled list with each label from the first column
# of the file. Each value, character or numeric, from the second column
# It will accept vectors as space-delimited entries in the second column
# of the text file
df <- read.table(path, sep='\t', header=FALSE, row.names=1,
colClasses="character", blank.lines.skip=TRUE, fill=TRUE)
print(df)
print(dim(df))
# There should only be one column of values
if (dim(df)[2] > 1){
stop(paste("Your summary file should only have 1 (tab-delimited) column of values.",
sprintf("it has %s columns", dim(df)[2])))
}
print('read table output:')
print(paste('Df dimensions', dim(df)))
#lst_params <- setNames(split(df, seq(nrow(df))), rownames(df))
# Make an empty list to gather param values
lst = setNames(vector("list", dim(df)[1]), rownames(df))
for (idx in rownames(df)){
print(df[idx,])
print(sprintf('----------- %s', df[idx,]))
if (df[idx,] == "") {
stop(paste(sprintf('The parameter %s is missing!', idx),
'Please set it, or use one of the preset parameter settings'))
}
# if no alphabetic characters, convert character to numeric
digit = grepl("[[:digit:]]", df[idx,])
alpha = grepl("[[:alpha:]]", df[idx,])
if (digit &&! alpha) {
sprintf('%s is a number(s)', df[idx,])
print(class(df[idx,]))
lst[[idx]] = char_to_numeric(df[idx,])
}
else if (alpha &&! digit){
lst[[idx]] = df[idx,]
}
}
return(lst)
}
path_txt = paste('/home/irockafe/',
'Dropbox (MIT)/Alm_Lab/projects/',
'revo_healthcare/notebooks/playing_around/',
'summary_file.txt', sep='')
path_relative = ('summary_file.txt')
params = get_params(path_relative, char_to_numeric)
params
class(params)
class(params$method)
class(params$prefilter)
In [3]:
if (1 == 2){
print('poop')
} else if(1==1){
print('yayyy')
}
In [4]:
# Get default settings from user input string
default_params = function(string) {
# Default xcms parameters for various MS and LC combinations
# Taken from "Meta-analysis of untargeted metabolomic
# data from multiple profiling experiments", Patti et al
# RETURN [param_name1=param_value1, param_name2=param_value2...]
# Define the preset values as nested list, indexed on
# as [LC-MS[ppm, peakwidth, bw, mzwid, prefilter]]
default_params = list(
hplc_qtof = list(ppm=30, peakwidth=c(10,60), bw=5,
mzwid=0.025, prefilter=c(0,0)),
hplc_qtof_hires = list(ppm=15, peakwidth=c(10,60), bw=5,
mzwid=0.015, prefilter=c(0,0)),
hplc_orbitrap = list(ppm=2.5, peakwidth=c(10,60), bw=5,
mzwid=0.015, prefilter=c(3,5000)),
uplc_qtof = list(ppm=30, peakwidth=c(5,20), bw=2,
mzwid=0.025, prefilter=c(0,0)),
uplc_qtof_hires = list(ppm=15, peakwidth=c(5,20), bw=2,
mzwid=0.015, prefilter=c(0,0)),
uplc_orbitrap = list(ppm=2.5, peakwidth=c(5,20), bw=2,
mzwid=0.015, prefilter=c(3,5000))
)
# If your string is found in the default list,
# return the default params
if (exists(string, where=default_params)) {
# double brackets because it removes the name of
# the top-level nested-list (i.e. uplc_orbitrap$ppm becomes
# simply $ppm when you use double brackets )
params = default_params[[string]]
return(params)
}
else{
msg = sprintf(paste("I couldn't understand your input.",
"The allowed inputs are: %s"),
paste(names(default_params), collapse=', '))
stop(msg)
}
}
preset_params = default_params('hplc_qtof')
preset_params
default_params('asfdasfdasd')
In [5]:
NULL
print(is.null(NULL))
In [6]:
# TODO: Adapt xcms_script to accept parameters
# ??? How to take a list of {key:value} from r and
# define parameters from it? Use default arguments for everything,
# then overwrite them if present?
# Could overwrite with default params
# use minsample exclusively to determine how many samples
# a peak must be present in to work
run_xcms = function(data_dir, params, polarity_mode,
# peak detection params
detection_method='centWave', ppm=25,
peak_width=c(10,60), prefilter=c(3,100),
# grouping parameters
bw=20, mzwid=0.025, minfrac=0, minsamp=2,
# retention-correction parameters
retcor_method='loess',
missing=1, extra=1
)
{
# check if param_value in user-defined list
# escape spaces in data_dir path
output_dir = getwd()
xcms_feature_table = "xcms_result"
camera_feature_table = "xcms_camera_results.csv"
nSlaves=0
### change working directory to your files, see +++ section
setwd(data_dir)
# wrtie parameters to file
peak_width_str = paste(paste(peak_width), collapse=" ")
prefilter_str = paste(paste(prefilter), collapse=" ")
param_string = sprintf('### Peak Detection Parameters
peak_picking\t%s
ppm\t%i
peak_width\t%s
prefilter\t%s
### Peak-grouping parameters
bw\t%i
mzwid\t%.4f
minfrac\t%i
minsamp\t%i
### retention-correction parameters
retcor_method\t%s
missing\t%i
extra\t%i
',
detection_method, ppm, peak_width_str, prefilter_str,
bw, mzwid, minfrac, minsamp,
retcor_method, missing, extra)
params_file = paste(output_dir, '/xcms_params.tab', sep='')
write(param_string, file=params_file)
stop("Don't want to actually run xcms when testin")
# Load packages
library(xcms)
library(CAMERA)
library(snowfall)
# Detect peaks
xset <- xcmsSet(method=detection_method, ppm=ppm, prefilter=prefilter,
peakwidth=peak_width, )
print("finished peak Detection!")
# Group peaks together across samples
# save the detected peaks in case downstream processing fails
# and you have to go in manually to figure out parameters
# TODO - should the bw parameter here be changed depending
# on the platform used (i.e. bw=30 is default, but for uplc,
# maybe the coarse grouping should be bw=15 or something)
xset <- group(xset)
print("finished first group command!")
print(xset)
saveRDS(xset, paste(output_dir, '/xset.Rdata', sep=''))
# Try to retention-correct
xset2 <- retcor(xset, method=retcor_method,
family="s", plottype="m",
missing=missing)
print("finished retcor!")
# regroup after retention time correction
# Base grouping on minimum number of samples, not fraction.
xset2 <- group(xset2, bw =bw, mzwid=mzwid,
minfrac=0, minsamp=minsamp)
saveRDS(xset2, paste(output_dir, '/xset2.Rdata', sep=''))
print("finished second group command!")
# Fill in peaks that weren't detected
xset3 <- fillPeaks(xset2)
print("Finished filling peaks!")
print(xset3)
# Move back to output_dir and write out your feature table
setwd(output_dir)
xcms_peaklist = peakTable(xset3, filebase=xcms_feature_table)
write.csv(xcms_peaklist, paste(xcms_feature_table,ppm, sep='_'))
print("Finished xcms!")
}
# Make paths work!
mzml_path = '/home/irockafe/Dropbox (MIT)/Alm_Lab/projects/revo_healthcare/notebooks/playing_around/sample_data'
mzml_path = gsub(' ', '\ ', mzml_path, fixed=TRUE)
mzml_path
print(paste('Working dir: ', getwd()))
print(mzml_path)
setwd(mzml_path)
print(sprintf('Working dir now: %s', getwd()))
setwd(mzml_path)
#run_xcms(mzml_path, 'positive')
In [9]:
preset_params
preset_params$asdf
In [21]:
get_params('xcms_params.tab')