In [2]:
import pandas as pd
Here we are running BMIQ normalization on all of our quantile-normalized data together. I am using the implementation provided by Steve Horvath along with his recent methylation-age paper. We are doing this as a result of this paper's recomendation to run both quantile normalization and BMIQ in series on the same datasets.
In [1]:
import pandas as pd
import rpy2.robjects as robjects
from pandas.rpy.common import convert_to_r_dataframe
from pandas.rpy.common import convert_robj
from IPython.display import clear_output
Load Horvath normalization source into R namespace.
Read in cell composition adjusted, quantile-normalized beta values and cell counts from the MINFI pipeline.
In [2]:
c = [u'3001', u'3002', u'3003', u'3004', u'3005',
u'3006', u'3007', u'3008', u'3009', u'3010']
In [3]:
betas = pd.read_hdf(HDFS_DIR + 'methylation_norm.h5', 'betas_adj', columns=c)
In [4]:
gold_standard_ah = pd.read_hdf(HDFS_DIR + 'methylation_norm.h5', 'Hannum_gold_standard')
In [5]:
betas = betas.ix[gold_standard_ah.index]
In [6]:
if betas.isnull().sum().sum() > 0:
betas = betas.T.fillna(gold_standard_ah).T
In [7]:
robjects.r.library('WGCNA');
robjects.r.source("/cellar/users/agross/Data/MethylationAge/Horvath/NORMALIZATION.R");
clear_output()
In [8]:
df_r = robjects.r.t(convert_to_r_dataframe(betas))
gs = list(gold_standard_ah.ix[betas.index])
gs_r = robjects.FloatVector(gs)
In [9]:
del betas
In [10]:
data_n = robjects.r.BMIQcalibration(df_r, gs_r)
data_n = convert_robj(data_n).T
clear_output()
In [11]:
data_n.columns = data_n.columns.map(lambda s: s.replace('.','-'))
data_n.columns = data_n.columns.map(lambda s: s[1:] if s.startswith('X') else s)
In [14]:
store = pd.HDFStore(HDFS_DIR + 'methylation_norm_tmp.h5')
In [13]:
#store = pd.HDFStore('/data_ssd/methylation_norm.h5')
#store.append('quant_BMIQ_adj', data_n)
#store.create_table_index('quant_BMIQ_adj', optlevel=9, kind='full')