In [1]:
from __future__ import division
%pylab inline
import glob
import seaborn as sns
sns.set_style('ticks')
sns.set_context('paper')
In [2]:
def iter_palindrome(fn):
with open(fn,'r') as f:
coords = []
seqs = []
name = None
out = False
for line in f:
line = line.rstrip()
if 'Palindromes of' in line:
name = line.split()[2]
elif ':' in line or len(line) == 0:
continue
else:
if '|' in line:
nm = line.count('|')
out = True
continue
else:
line = line.split()
s,e = int(line[0]),int(line[-1])
if s > e:
s,e = e,s
s -= 1
coords.append((s,e))
seqs.append(line[1].upper())
if out:
G = coords[1][0]-coords[0][1]
for (a,b),c in zip(coords,seqs):
yield name,a,b,c,nm/len(c),G
out = False
coords = []
seqs = []
def pal2mat(fn,minstem=5,maxstem=20,mingap=0,maxgap=20,pct_id=0.8,pidscale=False,norm=True):
mat = np.zeros((maxstem-minstem+1,maxgap-mingap+1))
nreads = 0
for _,s,e,seq,pi,G in iter_palindrome(fn):
nreads+=1
L = len(seq)
if (L < minstem or L > maxstem):
continue
if (G < mingap or G > maxgap):
continue
if pi < pct_id:
continue
if not pidscale:
mat[L-minstem,G-mingap] += 1
else:
mat[L-minstem,G-mingap] += pct_id
return mat, nreads
In [9]:
pfxs = ['../data/palindrome/multi_species_sampled_reads/homo_sapiens',
'../data/palindrome/multi_species_sampled_reads/pan_troglodytes',
'../data/palindrome/multi_species_sampled_reads/pan_paniscus',
'../data/palindrome/multi_species_sampled_reads/gorilla_gorilla',
'../data/palindrome/multi_species_sampled_reads/gorilla_berengei',
'../data/palindrome/multi_species_sampled_reads/pongo_abelii',
'../data/palindrome/multi_species_sampled_reads/pongo_pygmaeus',
'../data/palindrome/multi_species_sampled_reads/chlorocebus_aethiops',
'../data/palindrome/multi_species_sampled_reads/chlorocebus_pygerythrus',
'../data/palindrome/multi_species_sampled_reads/chlorocebus_sabaeus',
'../data/palindrome/multi_species_sampled_reads/macaca_mulatta',
'../data/palindrome/multi_species_sampled_reads/macaca_fascicularis',
'../data/palindrome/multi_species_sampled_reads/papio_anubis',
'../data/palindrome/multi_species_sampled_reads/mouse_misat',
'../data/palindrome/multi_species_sampled_reads/equus_caballus',
'../data/palindrome/multi_species_sampled_reads/chicken_sim',
'../data/palindrome/multi_species_sampled_reads/stickleback',
'../data/palindrome/multi_species_sampled_reads/pombe.cen_core.sim_reads',
'../data/palindrome/multi_species_sampled_reads/oryza_sativa',
'../data/palindrome/multi_species_sampled_reads/zea_mays',
'../data/palindrome/multi_species_sampled_reads/arabidopsis_thaliana'
]
In [ ]:
data = []
for pfx in pfxs:
x,xn = pal2mat(pfx+'.txt',minstem=3,maxstem=10,maxgap=20,pct_id =0.8)
y,_ = pal2mat(pfx+'.perm.s0.txt',minstem=3,maxstem=10,maxgap=20,pct_id =0.8)
print pfx
ratio = np.log2(np.sum(x,axis=1)/xn+1) - np.log2(np.sum(y,axis=1)/xn+1)
d = ratio
data.append(d)
imshow(d.reshape(1,(len(d))),vmin=-0.001,vmax=0.25,cmap=cm.bone_r,rasterized=True,interpolation='none')
plt.xticks([])
plt.yticks([])
plt.show()
In [15]:
plt.imshow(np.array(data)[:,0:6],cmap=cm.bone_r,vmin=0.05,vmax=0.25,interpolation='none',rasterized=True)
plt.axhline(6.5,c='black',ls='dotted',lw=1)
plt.axhline(12.5,c='black',ls='dotted',lw=1)
plt.axhline(13.5,c='black',ls='dotted',lw=1)
plt.axhline(14.5,c='black',ls='dotted',lw=1)
plt.axhline(15.5,c='black',ls='dotted',lw=1)
plt.axhline(16.5,c='black',ls='dotted',lw=1)
plt.axhline(17.5,c='black',ls='dotted',lw=1)
plt.axvline(1.5,c='black',ls='dotted',lw=1)
plt.xticks([])
plt.yticks([])
plt.savefig('../figures/dyad_density.heatmap.svg')