Read PRC2 Probes


In [2]:
import NotebookImport
from DX_Imports import *

In [3]:
import pandas as pd
import os as os

In [4]:
print 'Reading in PRC2 probes'


Reading in PRC2 probes

In [5]:
path = '/cellar/users/agross/Data/GeneSets/PRC2_Binding/'

In [6]:
r = {}
for f in os.listdir(path):
    df = pd.read_table(path + f, header=None, 
                       names=['chr','start','end','name','score','sig'])
    df['chr'] = df['chr'].str.replace('chr','')

    res = {}
    for c in df['chr'].unique():
        df_c = probe_annotations[probe_annotations.Chromosome == c]
        coord = df_c.Genomic_Coordinate
        test_p = lambda p: (coord >= p.start) & (coord <= p.end)
        res[c] = df[df.chr == c].apply(test_p, 1).any()
    res = pd.concat(res).groupby(level=1).sum()
    r[f] = res
r = pd.concat(r, 1)

In [8]:
r.to_csv(path + 'mapped_to_methylation_probes.csv')

I'm using a cutoff of 3/5 datasets to annotate a binding site.


In [7]:
prc2_probes = r.sum(1)>2