Pull common probes from all methylation arrays and create a new store.
This is being done to allow for efficient selection of probes in a batch across tissues.


In [3]:
import pandas as pd

In [2]:
store = pd.HDFStore(METH_STORE)

In [11]:
idx = {}
for c in store.keys():
    if c in ['/matched_tn','/codes']:
        continue
    idx[c] = store[c].index
idx_common = reduce(set.intersection, map(set, idx.values()))
len(idx_common)


Out[11]:
395883

In [17]:
for c in store.keys():
    if c in ['/matched_tn','/codes']:
        continue
    df = store[c]
    df.ix[idx_common].to_hdf(METH_STORE_MATCHED, c, format='t',
                             append=False, complevel=9)