Methylation Normalization Protocol

require(minfi)

Save Hannum Data

baseDir = '/cellar/users/agross/Data/Hannum_Methylation/idats/'
targets = read.450k.sheet(baseDir)
rgSet <- read.450k.exp(base = baseDir, targets = targets)

##detectionP(rgSet, type = "m+u")
Mset.quantile = preprocessQuantile(rgSet, removeBadSamples = TRUE)
beta = getBeta(Mset.quantile)
write.csv(beta, file = '/cellar/users/agross/TCGA_Code/Methlation/Hannum 
cellCounts = estimateCellCounts(rgSet, meanPlot=FALSE)
write.csv(cellCounts, file = '/cellar/users/agross/TCGA_Code/Methlation/Hannum/cell_counts_2.csv') 

Read in HIV Data

baseDir = '/cellar/users/agross/TCGA_Code/Methlation/UCSD_Methylation/Raw_Data/AllFiles/'
targets2 <- read.450k.sheet(baseDir)
rgSet2 = read.450k.exp(base = baseDir, targets = targets2)

Combine Hannum and HIV

newpd <- data.frame(sampleNames = c(sampleNames(rgSet), sampleNames(rgSet2)), studyIndex = rep(c("user", "reference"), times = c(ncol(rgSet), ncol(rgSet2))), stringsAsFactors = FALSE)
oldpd1 = pData(rgSet)
pData(rgSet) = newpd
oldpd2 = pData(rgSet2)
pData(rgSet2) = newpd
rgSetC <- combine(rgSet, rgSet2)
pData(rgSetC) = newpd

Add EPIC Data into the Mix

baseDir = '/cellar/users/agross/Data/Methylation_Controls/EPIC_ITALY'
rgSet3 <- read.450k.exp(baseDir)
newpd1 = data.frame(sampleNames = c(oldpd1[[1]], oldpd2[[2]], sampleNames(rgSet3)), studyIndex = rep(c("s1", "s2","s3"), times = c(nrow(oldpd1), nrow(oldpd2), ncol(rgSet3))), stringsAsFactors = FALSE)
oldpd3 = pData(rgSet3)
pData(rgSet) = newpd1
pData(rgSet2) = newpd1
pData(rgSet3) = newpd1
pData(rgSetC) = newpd1

rgSetD <- combine(rgSetC, rgSet3)

Run Quantile Normalization

Mset.quantile = preprocessQuantile(rgSetD, removeBadSamples = TRUE)
beta = getBeta(Mset.quantile)
write.csv(beta, '/cellar/users/agross/TCGA_Code/Methlation/beta_combined_quantile.csv')
df <- colData(Mset.quantile)
write.csv(df, '/cellar/users/agross/TCGA_Code/Methlation/beta_combined_quantile_labels.csv') 

Run estimateCellCounts

rgSetD$Slide <- rgSetD$Slide #bug of some sort
cell_counts = estimateCellCounts(rgSetD)

In [ ]: