In [1]:
%run ../../shared_setup.ipynb
In [2]:
def tabulate(f):
class Tabulated(etl.Table):
def __init__(self, *args, **kwargs):
self.args = args
self.kwargs = kwargs
def __iter__(self):
return f(*self.args, **self.kwargs)
return Tabulated
In [3]:
@tabulate
def tabulate_core_windows(window_size):
yield 'chrom', 'start', 'stop'
for rec in tbl_regions_1b.eq('region_type', 'Core').records():
for start in range(rec.region_start, rec.region_stop, window_size):
yield rec.region_chrom, start, start + window_size - 1
In [4]:
tbl_co = (
etl
.frompickle(os.path.join(PUBLIC_DIR, 'tbl_co.pickle'))
.convert('chrom', lambda v: str(v, 'ascii'))
)
display_with_nrows(tbl_co, caption='CO events')
In [5]:
tbl_windows = tabulate_core_windows(5000)
tbl_windows
Out[5]:
In [10]:
# count COs in windows
tbl_windows_co = (
tbl_windows
.intervalleftjoin(tbl_co, lkey='chrom', lstart='start', lstop='stop',
rkey='chrom', rstart='co_pos_min', rstop='co_pos_max',
include_stop=True)
.cutout(4)
.aggregate(key=('chrom', 'start', 'stop'),
aggregation=lambda vals: collections.Counter([v for v in vals if v is not None]),
value='cross')
.rename('value', 'co_count')
.addfield('co_count_3d7_hb3', lambda row: row.co_count['3d7_hb3'])
.addfield('co_count_hb3_dd2', lambda row: row.co_count['hb3_dd2'])
.addfield('co_count_7g8_gb4', lambda row: row.co_count['7g8_gb4'])
)
tbl_windows_co
Out[10]:
In [11]:
tbl_windows_co.valuecounts('co_count_3d7_hb3').displayall()
In [12]:
tbl_windows_co.valuecounts('co_count_hb3_dd2').displayall()
In [13]:
tbl_windows_co.valuecounts('co_count_7g8_gb4').displayall()
In [16]:
tbl_hotspots = (
tbl_windows_co
.select(lambda row: row.co_count_3d7_hb3 >= 2 or row.co_count_hb3_dd2 >= 2 or row.co_count_7g8_gb4 >= 2)
.addfield('n_hot', lambda row: sum(1 for v in [row.co_count_3d7_hb3, row.co_count_hb3_dd2, row.co_count_7g8_gb4] if v >=2))
)
tbl_hotspots
Out[16]:
In [17]:
tbl_hotspots.valuecounts('n_hot')
Out[17]:
In [18]:
tbl_hotspots.nrows()
Out[18]:
In [ ]: