Got to do this in rpy2
In [ ]:
HDFS_DIR = '/cellar/users/agross/Data/SSD/'
In [1]:
import pandas as pd
In [38]:
from pandas.rpy.common import convert_robj
import rpy2.robjects as robjects
from pandas.rpy.common import convert_to_r_dataframe, convert_robj
In [3]:
%load_ext rpy2.ipython
In [4]:
%%R
require('IlluminaHumanMethylation450kanno.ilmn12.hg19')
ann = IlluminaHumanMethylation450kanno.ilmn12.hg19
data = ann@data
In [8]:
islands = convert_robj(robjects.r("data.frame(data[['Islands.UCSC']])"))
locations = convert_robj(robjects.r("data.frame(data[['Locations']])"))
other = convert_robj(robjects.r("data.frame(data[['Other']])"))
other = other.ix[:, 2:] #first two columns are probe sequences which take up a lot of space
snps = convert_robj(robjects.r("data.frame(data[['SNPs.Illumina']])"))
In [9]:
PATH = '/cellar/users/agross/Data/GeneSets/methylation450_annotations.csv'
probe_annotations = pd.read_csv(PATH, index_col=0)
In [26]:
store = pd.HDFStore(HDFS_DIR + 'methylation_annotation.h5')
store.append('islands', islands)
store.create_table_index('islands', optlevel=9, kind='full')
store.append('locations', locations)
store.create_table_index('locations', optlevel=9, kind='full')
store.append('other', other)
store.create_table_index('other', optlevel=9, kind='full')
store.append('snps', snps)
store.create_table_index('snps', optlevel=9, kind='full')
store.append('probe_annotations', probe_annotations)
store.create_table_index('probe_annotations', optlevel=9, kind='full')
In [27]:
%%R
require(minfi)
require(minfiData)
require(doParallel)
referencePkg <- "FlowSorted.Blood.450k"
require(referencePkg, character.only = TRUE)
data(list = referencePkg)
referenceRGset <- get(referencePkg)
In [28]:
%%R
Mset.quantile = preprocessQuantile(referenceRGset, removeBadSamples = TRUE)
beta = getBeta(Mset.quantile)
In [29]:
beta = convert_robj(robjects.r("data.frame(beta)"))
In [30]:
labels = convert_robj(robjects.r('referenceRGset$CellType'))
label_map = pd.Series(labels, beta.columns)
In [31]:
store['label_map'] = label_map
In [ ]:
beta.to_hdf(store,'flow_sorted_data')
In [33]:
store.close()
In [34]:
robjects.r.library('WGCNA');
robjects.r.source("/cellar/users/agross/Data/MethylationAge/Horvath/NORMALIZATION.R");
In [35]:
gold_standard = pd.read_csv('/cellar/users/agross/Data/MethylationAge/Horvath/probeAnnotation21kdatMethUsed.csv',
index_col=0)
In [39]:
def normalize_meth(df):
df = df.ix[gold_standard.index]
df = df.T.fillna(gold_standard.goldstandard2).T
df_r = robjects.r.t(convert_to_r_dataframe(df))
gs = list(gold_standard.goldstandard2.ix[df.index])
gs_r = robjects.FloatVector(gs)
data_n_r = robjects.r.BMIQcalibration(df_r, gs_r)
data_n = convert_robj(data_n_r).T
return data_n
In [40]:
data_n = normalize_meth(beta)
In [42]:
data_n.to_hdf(HDFS_DIR, 'methylation_annotation.h5',
'flow_sorted_data_horvath_norm')