In [2]:
import pandas as pd

BMIQ Normalization

Here we are running BMIQ normalization on all of our quantile-normalized data together. I am using the implementation provided by Steve Horvath along with his recent methylation-age paper. We are doing this as a result of this paper's recomendation to run both quantile normalization and BMIQ in series on the same datasets.


In [1]:
import pandas as pd

import rpy2.robjects as robjects
from pandas.rpy.common import convert_to_r_dataframe
from pandas.rpy.common import convert_robj

from IPython.display import clear_output

Load Horvath normalization source into R namespace.

Read in cell composition adjusted, quantile-normalized beta values and cell counts from the MINFI pipeline.


In [2]:
c = [u'3001', u'3002', u'3003', u'3004', u'3005', 
     u'3006', u'3007', u'3008', u'3009', u'3010']

In [3]:
betas = pd.read_hdf(HDFS_DIR + 'methylation_norm.h5', 'betas_adj', columns=c)

In [4]:
gold_standard_ah = pd.read_hdf(HDFS_DIR + 'methylation_norm.h5', 'Hannum_gold_standard')

In [5]:
betas = betas.ix[gold_standard_ah.index]

In [6]:
if betas.isnull().sum().sum() > 0:
    betas = betas.T.fillna(gold_standard_ah).T

In [7]:
robjects.r.library('WGCNA');
robjects.r.source("/cellar/users/agross/Data/MethylationAge/Horvath/NORMALIZATION.R");
clear_output()

In [8]:
df_r = robjects.r.t(convert_to_r_dataframe(betas))
gs = list(gold_standard_ah.ix[betas.index])
gs_r = robjects.FloatVector(gs)

In [9]:
del betas

In [10]:
data_n = robjects.r.BMIQcalibration(df_r, gs_r)
data_n = convert_robj(data_n).T
clear_output()

In [11]:
data_n.columns = data_n.columns.map(lambda s: s.replace('.','-'))
data_n.columns = data_n.columns.map(lambda s: s[1:] if s.startswith('X') else s)

In [14]:
store = pd.HDFStore(HDFS_DIR + 'methylation_norm_tmp.h5')

In [13]:
#store = pd.HDFStore('/data_ssd/methylation_norm.h5')
#store.append('quant_BMIQ_adj', data_n)
#store.create_table_index('quant_BMIQ_adj', optlevel=9, kind='full')