Pull common probes from all methylation arrays and create a new store.
This is being done to allow for efficient selection of probes in a batch across tissues.
In [3]:
import pandas as pd
In [2]:
store = pd.HDFStore(METH_STORE)
In [11]:
idx = {}
for c in store.keys():
if c in ['/matched_tn','/codes']:
continue
idx[c] = store[c].index
idx_common = reduce(set.intersection, map(set, idx.values()))
len(idx_common)
Out[11]:
In [17]:
for c in store.keys():
if c in ['/matched_tn','/codes']:
continue
df = store[c]
df.ix[idx_common].to_hdf(METH_STORE_MATCHED, c, format='t',
append=False, complevel=9)