This workflow was conceived by Louis-Felix, Ivan Protsiuc, and Kai Durkhop. It has been implemented in Jupyter by Madeleine and Ricardo. Documentation for the GNPS Trinity workflow can be found here: https://bix-lab.ucsd.edu/display/Public/GNPS+Trinity+workflow
load libraries
In [11]:
library(plyr)
load feature quantification matrix from optimus
In [12]:
opt <- read.csv("features_quantification_matrix.csv", sep=",", dec=".",header = TRUE)
transpose
In [13]:
opt_num <- t(opt[,-1])
opt_num <- as.data.frame(opt_num)
colnames(opt_num) <- opt[,which(colnames(opt)=="Sample.name")]
opt_num <- cbind(rownames(opt_num),opt_num)
colnames(opt_num)[1] <- "row.ID"
opt_num <- cbind(c(1:nrow(opt_num)),opt_num)
colnames(opt_num)[1] <- "shared.name"
rownames(opt_num) <- c(1:nrow(opt_num))
import molecular formula file from Sirius
In [14]:
no_col <- max(count.fields("SiriusMF.csv", sep = "\t"))
SirMF <- read.table("SiriusMF.csv", sep="\t", header=FALSE,fill=TRUE,col.names=1:no_col)
name columns
In [15]:
nMF <- (ncol(SirMF)-3)/2
colnames(SirMF)[4:ncol(SirMF)] <- paste(rep(c("MF","Score"),nMF),rep(c(1:nMF),each=2),sep="")
colnames(SirMF)[1:3]<-c("FeatureID","mz","adduct")
add smile structures etc. from seperate files
In [16]:
filenames <- list.files("CSIFingerID")
filenames <- gsub(".csv", "", filenames)
smiles <- list()
for (i in 1:nrow(SirMF)){
if(as.character(SirMF$FeatureID[i]) %in% filenames){
smiles[[i]] <- read.table(paste("CSIFingerID/",SirMF$FeatureID[i],".csv",sep=""), sep="\t", header=TRUE,fill=TRUE,quote="")
names(smiles)[i] <- SirMF$FeatureID[i]
}
}
add smiles to sirius molecular formula table
In [17]:
smiles <- smiles[vapply(smiles, Negate(is.null), NA)]
colap <- list()
for (i in 1:length(smiles)){
p <- c()
for (j in 1:ncol(smiles[[i]])){
p <- c(p,paste(smiles[[i]][,j], collapse=","))
}
colap[[i]] <- p
}
df <- data.frame(matrix(unlist(colap), nrow=length(colap), byrow=T))
rownames(df) <- names(smiles)
colnames(df) <- colnames(smiles[[1]])
colnames(df)[which(colnames(df)=="name")] <- "CompoundName"
df <- cbind(rownames(df),df)
colnames(df)[1] <- "FeatureID"
SirComb <- merge(SirMF,df,by="FeatureID",all=T)
combine data from Sirius with Optimus output
In [18]:
FeatureIDs <- gsub(".*ID","",opt_num[,which(colnames(opt_num)=="row.ID")])
FeatureIDs <- gsub("[.]","",FeatureIDs)
opt_num <- cbind(FeatureIDs,opt_num)
colnames(opt_num)[1] <- "FeatureID"
optMF <- join(opt_num,SirComb,by="FeatureID")
optMF <- optMF[,c(which(colnames(optMF)=="shared.name"),which(colnames(optMF)!="shared.name"))]
write output file
In [19]:
write.table(optMF,file="Trinity_output.csv",row.names=F,sep=";",quote=FALSE)
In [ ]: