In [ ]:
source("http://depot.sagebase.org/CRAN.R")
pkgInstall("synapseClient")
library(synapseClient)
Use your own credentials to log in
In [ ]:
synapseLogin(username = "user", password = "passwd", rememberMe = TRUE)
Once logged in, you can download data by synapse ID
In [ ]:
geneCounts.Formal <- synGet("syn4650257", downloadLocation = "./")
geneCounts <- read.delim(geneCounts.Formal@filePath, stringsAsFactors = F)
anno.Formal <- synGet("syn3817650", downloadLocation = "./")
anno <- read.delim(anno.Formal@filePath, stringsAsFactors = F)
Let's see how the data objects look like
In [ ]:
head(anno)
geneCounts[1:5,1:5]
Install and load required packages
In [ ]:
source("http://bioconductor.org/biocLite.R")
biocLite("biomaRt")
library(biomaRt)
This function assumes that ensembl IDs are in rownames of data matrix. Duplicate rows with same gene symbol are removed by leaving the one with the most counts.
In [ ]:
ensembl_id_to_gene_symbol <- function(data) {
ensembl <- useMart("ensembl", dataset = "hsapiens_gene_ensembl")
genes <- getBM(attributes = c("ensembl_gene_id", "external_gene_name"),
filters = "ensembl_gene_id",
values = rownames(data),
mart = ensembl)
data <- data[rownames(data)%in%genes[,1],]
data <- data[order(rowSums(data), decreasing = T),]
genes <- genes[match(rownames(data), genes[,1]),]
dupl <- duplicated(genes[,2])
data <- data[!dupl,]
genes <- genes[!dupl,]
rownames(data) <- genes[,2]
return(data)
}
From geneCounts move ensembl IDs from 1st column to rownames.
In [ ]:
rownames(geneCounts) <- geneCounts[,1]
geneCounts <- geneCounts[,-1]
geneCounts[1:5,1:5]
Now convert ensembl IDs to symbols
In [ ]:
geneCounts <- ensembl_id_to_gene_symbol(geneCounts)
geneCounts[1:5,1:5]