In [1]:
from __future__ import division
%pylab inline
import seaborn as sns
sns.set_style('ticks')
sns.set_context('paper')
species = ['homo_sapiens','pan_troglodytes','pan_paniscus','gorilla_gorilla','gorilla_berengei',
'pongo_abelii','pongo_pygmaeus','chlorocebus_sabaeus','chlorocebus_pygerythrus','chlorocebus_aethiops','macaca_mulatta','macaca_fascicularis',
'papio_anubis']
# cols = ['#e41a1c']*11 + ['#377eb8']*7 + ['#a65628']*4
cols = ['#e41a1c']*7 + ['#377eb8']*6
def parse_data(fn):
"""file format: species count total ratio
return dict of ratios indexed by species."""
d = {}
with open(fn,'r') as f:
for line in f:
if '#' in line:
continue
line = line.rstrip().split()
name=line[0]
ratio=float(line[-1])
d[name] = ratio
return d
In [58]:
# CENP-B in unfiltered WGS reads
cb_total = parse_data('../data/cenp_b_ext.counts.txt')
plt.figure(figsize=(1.5,5.05))
x = np.arange(len(species))
y1 = np.array([cb_total[s] for s in species[::-1]])
barlist = plt.barh(x,y1,facecolor='black',height=0.4)
plt.yticks([])
plt.xticks([0,0.005,0.01],['0','0.5','1'],size=10)
plt.xlabel('Fraction of\nWGS reads ('+r'$10^{-2}$'+')',size=10)
# plt.ticklabel_format(axis='x',style='sci',scilimits=(0,1))
plt.title('CENP-B boxes',size=12)
for i,c in enumerate(cols[::-1]):
bar = barlist[i]
bar.set_linewidth(2)
bar.set_edgecolor('black')
bar.set_facecolor(c)
sns.despine(left=True)
plt.savefig('../figures/primate_cenpb.genome.abund.svg')
In [43]:
# CENP-B in filtered WGS reads (alphoid)
cb_alphoid = parse_data('../data/cenp_b_ext.counts.alphoid.txt')
plt.figure(figsize=(1.5,10))
x = np.arange(len(species))
y1 = np.array([cb_alphoid[s] for s in species[::-1]])
barlist = plt.barh(x,y1,facecolor='black',height=0.4)
plt.yticks([])
plt.xticks(size=10)
plt.xticks([0,0.05,0.1,0.15],['0','5','10','15'],size=10)
plt.xlabel('Fraction of\nalphoid reads ('+r'$10^{-2}$'+')',size=10)
plt.title('Alphoid\nCENP-B boxes',size=12)
for i,c in enumerate(cols[::-1]):
bar = barlist[i]
bar.set_linewidth(2)
bar.set_edgecolor('black')
bar.set_facecolor(c)
sns.despine(left=True)
plt.savefig('../figures/primate_cenpb_alphoid.abund.svg')
In [59]:
# Alpha satellite content
plt.figure(figsize=(1.5,5.05))
alphoid = parse_data('../data/alphoid.counts.txt')
x = np.arange(len(species))
y1 = np.array([alphoid[s] for s in species[::-1]])
barlist = plt.barh(x,y1,facecolor='black',height=0.4)
plt.yticks([])
plt.xticks([0,0.1,0.2],['0','1','2'],size=10)
plt.xlabel('Fraction of\nWGS reads ('+r'$10^{-1}$'+')',size=10)
plt.title(r'$\alpha$'+'-satellite',size=12)
for i,c in enumerate(cols[::-1]):
bar = barlist[i]
bar.set_linewidth(2)
bar.set_edgecolor('black')
bar.set_facecolor(c)
sns.despine(left=True)
plt.savefig('figures/primate_sat.abund.svg')
In [45]:
# CENP-B in filtered WGS reads (alphoid)
cb_alphoid = parse_data('../data/cenp_b_ext.counts.fimo.alphoid.txt')
plt.figure(figsize=(1.5,10))
x = np.arange(len(species))
y1 = np.array([cb_alphoid[s] for s in species[::-1]])
barlist = plt.barh(x,y1,facecolor='black',height=0.4)
plt.yticks([])
plt.xticks(size=12)
plt.xticks([0,0.1,0.2,0.3],['0','10','20','30'],size=12)
plt.xlabel('Fraction of\nalphoid reads ('+r'$10^{-2}$'+')',size=12)
plt.title('Alphoid CENP-B\nboxes (FIMO)',size=14)
for i,c in enumerate(cols[::-1]):
bar = barlist[i]
bar.set_linewidth(2)
bar.set_edgecolor('black')
bar.set_facecolor(c)
sns.despine(left=True)
plt.savefig('../figures/primate_cenpb_alphoid.fimo.abund.svg')
In [39]:
# CENP-B in filtered WGS reads (alphoid)
clusts = parse_data('../data/clusters.txt')
plt.figure(figsize=(1.5,6))
x = np.arange(len(species))
y1 = np.array([clusts[s] for s in species[::-1]])
barlist = plt.barh(x,y1,facecolor='black',height=0.4)
plt.yticks([])
plt.xticks(size=12)
plt.xticks([2,6,10,14,18],['2','6','10','14','18'],size=12)
plt.xlim(2,19)
plt.xlabel('Number of clusters',size=12)
plt.title('Array complexity',size=16)
for i,c in enumerate(cols[::-1]):
bar = barlist[i]
bar.set_linewidth(2)
bar.set_edgecolor('black')
bar.set_facecolor(c)
sns.despine(left=True)
plt.savefig('../figures/primate_clusters.svg')
In [49]:
plt.figure(figsize=(1.25,0.8))
species = ['wgs','masat','misat']
cols = ['#1f78b4','#e41a1c']
delta=0.1
width =0.4
mouse = parse_data('../data/mouse.cenp_b_ext.counts.txt')
x = np.arange(len(species)-1)
y1 = np.array([mouse[s] for s in species[::-1]])
y1 = y1[:-1]/y1[-1]
barlist = plt.barh(x,y1,facecolor='black',height=width)
plt.xticks([0,300])
# plt.xticks([0,0.25,0.5],['0','25','50'],size=14)
plt.xlabel('Fold enrichment\n(vs. WGS)',size=10)
plt.title('CENP-B box\nabundance',size=12)
plt.yticks(x,['MiSat','MaSat'],size=12)
plt.tick_params(axis='y',length=0)
for i,c in enumerate(cols[::-1]):
bar = barlist[i]
bar.set_linewidth(2)
bar.set_edgecolor('black')
bar.set_facecolor(c)
sns.despine(left=True,bottom=False,offset=5)
plt.xticks(size=10)
#
plt.savefig('../figures/mouse_cenpb.abund.svg')
In [50]:
plt.figure(figsize=(1.25,0.8))
species = ['masat','misat']
cols = ['#1f78b4','#e41a1c']
delta=0.1
width =0.4
mouse = parse_data('../data/mouse.sat_counts.txt')
x = np.arange(len(species))
y1 = np.array([mouse[s] for s in species])
# y1 = y1[:-1]/y1[-1]
barlist = plt.barh(x,y1[::-1],facecolor='black',height=width)
# plt.xticks([0,300])
plt.xticks([0,0.1],['0','1.1'],size=10)
plt.xlabel('Fold enrichment\n(vs. WGS)',size=10)
plt.title('Satellite\nabundance',size=12)
plt.yticks(x,['MiSat','MaSat'],size=12)
plt.tick_params(axis='y',length=0)
for i,c in enumerate(cols[::-1]):
bar = barlist[i]
bar.set_linewidth(2)
bar.set_edgecolor('black')
bar.set_facecolor(c)
sns.despine(left=True,bottom=False,offset=5)
plt.xticks(size=10)
#
plt.savefig('../figures/mouse_sat.abund.svg')