GNPS TRINITY - Combine Optimus (feature detection), Sirius (in silico annotation), with GNPS (molecular networking)

This workflow was conceived by Louis-Felix, Ivan Protsiuc, and Kai Durkhop. It has been implemented in Jupyter by Madeleine and Ricardo. Documentation for the GNPS Trinity workflow can be found here: https://bix-lab.ucsd.edu/display/Public/GNPS+Trinity+workflow

load libraries


In [11]:
library(plyr)

load feature quantification matrix from optimus


In [12]:
opt <- read.csv("features_quantification_matrix.csv", sep=",", dec=".",header = TRUE)

transpose


In [13]:
opt_num <- t(opt[,-1])
opt_num <- as.data.frame(opt_num)
colnames(opt_num) <- opt[,which(colnames(opt)=="Sample.name")]

opt_num <- cbind(rownames(opt_num),opt_num)
colnames(opt_num)[1] <- "row.ID"

opt_num <- cbind(c(1:nrow(opt_num)),opt_num)
colnames(opt_num)[1] <- "shared.name"
rownames(opt_num) <- c(1:nrow(opt_num))

import molecular formula file from Sirius


In [14]:
no_col <- max(count.fields("SiriusMF.csv", sep = "\t"))
SirMF <- read.table("SiriusMF.csv", sep="\t", header=FALSE,fill=TRUE,col.names=1:no_col)

name columns


In [15]:
nMF <- (ncol(SirMF)-3)/2
colnames(SirMF)[4:ncol(SirMF)] <- paste(rep(c("MF","Score"),nMF),rep(c(1:nMF),each=2),sep="")
colnames(SirMF)[1:3]<-c("FeatureID","mz","adduct")

add smile structures etc. from seperate files


In [16]:
filenames <- list.files("CSIFingerID")
filenames <- gsub(".csv", "", filenames)
smiles <- list()
  for (i in 1:nrow(SirMF)){
    if(as.character(SirMF$FeatureID[i]) %in% filenames){
    smiles[[i]] <- read.table(paste("CSIFingerID/",SirMF$FeatureID[i],".csv",sep=""), sep="\t", header=TRUE,fill=TRUE,quote="") 
    names(smiles)[i] <- SirMF$FeatureID[i]
    }
  }

add smiles to sirius molecular formula table


In [17]:
smiles <- smiles[vapply(smiles, Negate(is.null), NA)]

colap <- list()
for (i in 1:length(smiles)){
  p <- c()
  for (j in 1:ncol(smiles[[i]])){
    p <- c(p,paste(smiles[[i]][,j], collapse=","))
  }
  colap[[i]] <- p
}

df <- data.frame(matrix(unlist(colap), nrow=length(colap), byrow=T))
rownames(df) <- names(smiles)
colnames(df) <- colnames(smiles[[1]])
colnames(df)[which(colnames(df)=="name")] <- "CompoundName"

df <- cbind(rownames(df),df)
colnames(df)[1] <- "FeatureID"

SirComb <- merge(SirMF,df,by="FeatureID",all=T)

combine data from Sirius with Optimus output


In [18]:
FeatureIDs <- gsub(".*ID","",opt_num[,which(colnames(opt_num)=="row.ID")])
FeatureIDs <- gsub("[.]","",FeatureIDs)

opt_num <- cbind(FeatureIDs,opt_num)
colnames(opt_num)[1] <- "FeatureID"

optMF <- join(opt_num,SirComb,by="FeatureID")
optMF <- optMF[,c(which(colnames(optMF)=="shared.name"),which(colnames(optMF)!="shared.name"))]

write output file


In [19]:
write.table(optMF,file="Trinity_output.csv",row.names=F,sep=";",quote=FALSE)

In [ ]: