In [16]:
import NotebookImport
from Imports import *
Got to do this in rpy2
In [1]:
%load_ext rpy2.ipython
In [2]:
import pandas as pd
from pandas.rpy.common import convert_robj
import rpy2.robjects as robjects
from pandas.rpy.common import convert_to_r_dataframe, convert_robj
In [3]:
%%R
require('IlluminaHumanMethylation450kanno.ilmn12.hg19')
ann = IlluminaHumanMethylation450kanno.ilmn12.hg19
data = ann@data
In [4]:
islands = convert_robj(robjects.r("data.frame(data[['Islands.UCSC']])"))
locations = convert_robj(robjects.r("data.frame(data[['Locations']])"))
other = convert_robj(robjects.r("data.frame(data[['Other']])"))
other = other.ix[:, 2:] #first two columns are probe sequences which take up a lot of space
snps = convert_robj(robjects.r("data.frame(data[['SNPs.Illumina']])"))
In [5]:
PATH = './Data/methylation450_annotations.csv'
probe_annotations = pd.read_csv(PATH, index_col=0)
In [7]:
store = pd.HDFStore(DATA_STORE)
store.append('islands', islands)
store.create_table_index('islands', optlevel=9, kind='full')
store.append('locations', locations)
store.create_table_index('locations', optlevel=9, kind='full')
store.append('other', other)
store.create_table_index('other', optlevel=9, kind='full')
store.append('snps', snps)
store.create_table_index('snps', optlevel=9, kind='full')
store.append('probe_annotations', probe_annotations)
store.create_table_index('probe_annotations', optlevel=9, kind='full')
In [11]:
ti = lambda s: s[s].index
isl = islands.sort(['Islands_Name','Relation_to_Island']).dropna()
isl = isl[isl.Islands_Name.isin(ti(isl.Islands_Name.value_counts() > 7))]
isl = isl[isl.Islands_Name != '']
ot = other.Regulatory_Feature_Name
ot = ot[ot.isin(ti(ot.value_counts()> 7))]
gb = pd.concat([isl, probe_annotations], axis=1)
gb = gb[gb.Gene_Symbol.notnull() & gb.Islands_Name.notnull()]
g2 = gb.sort('Islands_Name')
top_gene = lambda s: s.Gene_Symbol.value_counts().index[0]
island_to_gene = g2.groupby('Islands_Name').apply(top_gene)
probe_to_island = isl
store['probe_to_island'] = probe_to_island
store['island_to_gene'] = island_to_gene
In [8]:
%%R
require(minfi)
require(minfiData)
require(doParallel)
referencePkg <- "FlowSorted.Blood.450k"
require(referencePkg, character.only = TRUE)
data(list = referencePkg)
referenceRGset <- get(referencePkg)
In [9]:
%%R
Mset.quantile = preprocessQuantile(referenceRGset, removeBadSamples = TRUE)
beta = getBeta(Mset.quantile)
In [10]:
beta = convert_robj(robjects.r("data.frame(beta)"))
In [11]:
labels = convert_robj(robjects.r('referenceRGset$CellType'))
label_map = pd.Series(labels, beta.columns)
In [12]:
store['label_map'] = label_map
In [13]:
beta.to_hdf(store,'flow_sorted_data')
In [14]:
store.close()