chromosome = the chromosome that the loop is located on
x1,x2 = the coordinates of the upstream locus corresponding to the peak pixel (see the Experimental Procedures and VI.a.5.iv of the Extended Experimental Procedures of Rao, Huntley, et al., Cell 2014 for a definition of the peak pixel)
chromosome
y1,y2 = the coordinates of the downstream locus corresponding to the peak pixel (see the Experimental Procedures and VI.a.5.iv of the Extended Experimental Procedures of Rao, Huntley, et al., Cell 2014 for a definition of the peak pixel)
color = the color that the feature will be rendered as if loaded in Juicebox
observed = the raw observed counts at the peak pixel (see the Experimental Procedures and VI.a.5.iv of the Extended Experimental Procedures of Rao, Huntley, et al., Cell 2014 for a definition of the peak pixel)
expected_[bottom_left, donut, horizontal, vertical] = the expected counts calculated using the [bottom_left, donut, horizontal, vertical] filter (see Figure 3 and section VI.a.5.i of the Extended Experimental Procedures of Rao, Huntley, et al., Cell 2014)
fdr_[bottom_left, donut, horizontal, vertical] = the q-value of the loop calculated using the [bottom_left, donut, horizontal, vertical] filter (see VI.a.5.ii of the Extended Experimental Procedures of Rao, Huntley, et al., Cell 2014)
number_collapsed = the number of pixels that were clustered together as part of the loop call (see section VI.a.5.iv of the Extended Experimental Procedures of Rao, Huntley, et al., Cell 2014)
centroid1 = the upstream coordinate of the centroid of the cluster of pixels corresponding to the loop (see section VI.a.5.iv of the Extended Experimental Procedures of Rao, Huntley, et al., Cell 2014)
centroid2 = the downstream coordinate of the centroid of the cluster of pixels corresponding to the loop (see section VI.a.5.iv of the Extended Experimental Procedures of Rao, Huntley, et al., Cell 2014)
radius = the Euclidean distance from the centroid of the cluster of pixels to the farthest pixel in the cluster of pixels (see section VI.a.5.iv of the Extended Experimental Procedures of Rao, Huntley, et al., Cell 2014)
In [1]:
#header of a looplist file
loop_head = ['chr1', 'x1', 'x2', 'chr2', 'y1', 'y2', 'color', 'o', 'e_bl', 'e_donut', 'e_h', 'e_v', 'fdr_bl', 'fdr_donut', 'fdr_h', 'fdr_v', 'number collapsed', 'centroid1', 'centroid2', 'radius']
In [2]:
def to_bed12(looplist, out_file):
# conver a looplist file to bed12
#i=0
bed = open(out_file, "w")
with open (looplist, "r") as f:
f.readline()
for l in f:
l = l.replace("\n","").split("\t")
d = dict(zip(loop_head,l))
out = ("{}\t{}\t{}\t".format(d['chr1'],d['x1'],d['y2']) +
"{}:{}..{}-{}:{}-{}\t".format(d['chr1'],d['x1'],d['x2'],d['chr2'],d['y1'],d['y2']) +
"{}\t".format(d['fdr_donut']) +
".\t" +
"{}\t{}\t".format(d['x1'],d['y2']) +
"0,0,0\t" +
"2\t" +
"100000,100000\t" +
"0,{}\n".format(int(d['y2'])-int(d['x1'])) )
bed.write(out)
In [3]:
def to_bedpe(looplist, out_file):
#i=0
bedpe = open(out_file, "w")
with open (looplist, "r") as f:
f.readline()
for l in f:
l = l.replace("\n","").split("\t")
d = dict(zip(loop_head,l))
#print l
out = ("{}\t{}\t{}\t".format(d['chr1'],d['x1'],d['x2']) +
"{}\t{}\t{}\t".format(d['chr2'],d['y1'],d['y2']) +
"{}:{}..{}-{}:{}-{}\t".format(d['chr1'],d['x1'],d['x2'],d['chr2'],d['y1'],d['y2']) +
"{}\t".format(d['fdr_donut']) +
"*\t*\t" +
"{}\t{}\t{}\t".format(d['fdr_bl'], d['fdr_h'], d['fdr_v']) +
"0,{}\n".format(int(d['y2'])-int(d['x1'])) )
bedpe.write(out)
In [4]:
def to_hiBrowsein(looplist, out_file):
#i=0
hibrowsein = open(out_file, "w")
hibrowsein_head = ("chr1\tx1\tx2\tchr2\ty1\ty2\tfdr_bl\n")
hibrowsein.write(hibrowsein_head)
#chr1 x1 x2 chr2 y1 y2 fdr_bl
with open (looplist, "r") as f:
f.readline()
for l in f:
l = l.replace("\n","").split("\t")
d = dict(zip(loop_head,l))
#print l
out = ("{}\t{}\t{}\t{}\t{}\t{}\t{}\n".format(d['chr1'],d['x1'],d['x2'],d['chr2'],d['y1'],d['y2'],d['fdr_bl']))
hibrowsein.write(out)
In [5]:
def to_homer(looplist, out_file):
#i=0
homer = open(out_file, "w")
homer_head = ("InteractionID\t" +
"PeakID(1)\t" +
"chr(1)\t" +
"start(1)\t" +
"end(1)\t" +
"strand(1)\t" +
"Total Reads(1)\t" +
"PeakID(2)\t" +
"chr(2)\t" +
"start(2)\t" +
"end(2)\t" +
"strand(2)\t" +
"Total Reads(2)\t" +
"Distance\t" +
"Interaction Reads\t" +
"Expected Reads\t" +
"Z-score\t" + "LogP\t" + "FDR\t" + "Circos Thickness\n") #(Benjamini, based on 4.90e+08 total tests)
homer.write(homer_head)
with open (looplist, "r") as f:
f.readline()
for l in f:
l = l.replace("\n","").split("\t")
d = dict(zip(loop_head,l))
#print l
out = ("{}:{}..{}-{}:{}-{}\t".format(d['chr1'],d['x1'],d['x2'],d['chr2'],d['y1'],d['y2']) +
"{}-{}\t".format(d['chr1'],d['x1']) +
"{}\t{}\t{}\t".format(d['chr1'],d['x1'],d['x2']) +
".\t0\t" +
"{}-{}\t".format(d['chr2'],d['y1']) +
"{}\t{}\t{}\t".format(d['chr2'],d['y1'],d['y2']) +
".\t0\t" +
"{}\t".format(int(d['y2'])-int(d['x1'])) +
"0\t0\t0\t" +
"{}\t{}\t".format(d['fdr_bl'], d['fdr_donut']) +
"2\n" )
homer.write(out)
In [6]:
to_bedpe("/Users/pubudu/Documents/HiC-runs/hic_formatConversion/chr1-looplist.txt",'/Users/pubudu/Documents/HiC-runs/hic_formatConversion/chr1-looplist.bedpe2.bed') # to_bedpe(<looplist file>, <output file name>)
In [7]:
to_homer("/Users/pubudu/Documents/HiC-runs/hic_formatConversion/chr1-looplist.txt",'/Users/pubudu/Documents/HiC-runs/hic_formatConversion/chr1-looplist.homer2.bed') # to_homer(<looplist file>, <output file name>)
In [8]:
to_hiBrowsein("/Users/pubudu/Documents/HiC-runs/hic_formatConversion/chr1-looplist.txt",'/Users/pubudu/Documents/HiC-runs/hic_formatConversion/chr1-looplist.hibrowseIn.txt') # to_homer(<looplist file>, <output file name>)
In [ ]: