In [2]:
import NotebookImport
from DX_Imports import *
In [3]:
import pandas as pd
import os as os
In [4]:
print 'Reading in PRC2 probes'
In [5]:
path = '/cellar/users/agross/Data/GeneSets/PRC2_Binding/'
In [6]:
r = {}
for f in os.listdir(path):
df = pd.read_table(path + f, header=None,
names=['chr','start','end','name','score','sig'])
df['chr'] = df['chr'].str.replace('chr','')
res = {}
for c in df['chr'].unique():
df_c = probe_annotations[probe_annotations.Chromosome == c]
coord = df_c.Genomic_Coordinate
test_p = lambda p: (coord >= p.start) & (coord <= p.end)
res[c] = df[df.chr == c].apply(test_p, 1).any()
res = pd.concat(res).groupby(level=1).sum()
r[f] = res
r = pd.concat(r, 1)
In [8]:
r.to_csv(path + 'mapped_to_methylation_probes.csv')
I'm using a cutoff of 3/5 datasets to annotate a binding site.
In [7]:
prc2_probes = r.sum(1)>2