In [2]:
%matplotlib inline
In [3]:
# Some initial imports
import pandas as pd
import numpy as np
import os
from ggplot import *
In [4]:
# Sample titles with corresponding barcodes
samples = {
's9': ['ATCACG', 'ACAGTG'],
's9+bcm': ['CGATGT', 'GCCAAT'],
's17': ['TTAGGC', 'GATCAG'],
's17+bcm': ['TGACCA', 'TAGCTT'],
's19': ['CAGATC','GGCTAC'],
's19+bcm': ['ACTTGA', 'CTTGTA']
}
# Barcodes
barcodes = ['ATCACG', 'ACAGTG', 'CGATGT', 'GCCAAT', 'TTAGGC', 'GATCAG', 'TGACCA', 'TAGCTT', 'CAGATC','GGCTAC', 'ACTTGA', 'CTTGTA']
In [4]:
# Download data files
# Only run this cell once, then just comment the code out
import urllib
DOMAIN = 'http://ngs.nudlerlab.info'
file_tpl = '{0}.UTR_heatmap.txt.gz'
for barcode in barcodes:
file_name = file_tpl.format(barcode)
try:
print("Trying {0}".format(file_name))
urllib.request.urlretrieve("{0}/{1}".format(DOMAIN, file_name), "../data/{0}".format(file_name))
except IOError:
print("\terror downloading {0}. Skipping.".format(file_name))
In [7]:
# Load the data
data_tpl = '../data/{0}.UTR_heatmap.txt.gz'
data = {}
for barcode in barcodes:
try:
data[barcode] = np.loadtxt(data_tpl.format(barcode), delimiter='\t')
except IOError:
pass
In [8]:
data
Out[8]:
In [10]:
# import more stuff we'll need for plotting
import matplotlib.pyplot as plt
from matplotlib import cm
import math
In [15]:
fig, ax = plt.subplots(figsize=(18,18))
d = data['ACAGTG'].copy()
d.sort(axis=0)
im = ax.imshow(d, cmap=cm.OrRd)
plt.show()
In [10]:
# Our main plotting function
# Takes an np array as data source and does the plotting using vanilla matplotlib
# Remember that image is nothing more than an array of floats (intensities)
def plot_utr_heatmap(data, title=None, cmap=cm.OrRd, ylim=[1750, 1500], orientation='vertical'):
from mpl_toolkits.axes_grid1 import make_axes_locatable
utr = data.copy()
utr.sort(axis=0)
max_val = np.max(utr)
midpoint = math.ceil(max_val / 2)
fig, ax = plt.subplots(figsize=(18,18))
im = ax.imshow(utr,cmap=cmap)
if title:
ax.set_title(title, fontsize=20)
else:
ax.set_title("Read depth at 5'UTR", fontsize=20)
ax.set_ylim(ylim)
ax.set_xlabel('Position from TSS', fontsize=20)
ax.get_yaxis().set_ticks([])
for tick in ax.xaxis.get_major_ticks():
tick.label.set_fontsize(20)
divider = make_axes_locatable(ax)
cax = divider.append_axes("right", size="5%", pad=0.05)
cbar = fig.colorbar(im, cax=cax, ticks=[0,midpoint,max_val], orientation=orientation)
cbar.ax.invert_yaxis()
cbar.ax.set_xticklabels(['0', '{0}'.format(int(midpoint)), '{0}'.format(int(max_val))], fontsize=20)
plt.show()
In [11]:
plot_utr_heatmap(data['ACAGTG'])
In [12]:
for barcode,heatmap in data.items():
plot_utr_heatmap(heatmap, title=barcode)
In [13]:
# Ok, we do see some changes but let's try to quantify them
win = 20 # window size for computing rolling mean
quart = 0.5 # quartile to sum
df = pd.DataFrame(np.sort((data['ACAGTG'] + data['ATCACG']) / 2., axis=0))
mask = (df > np.max(df.max()) * quart)
col1 = pd.rolling_mean(df[mask].cumsum()[-1:].transpose(), win)
df = pd.DataFrame(np.sort((data['CGATGT'] + data['GCCAAT']) / 2., axis=0))
mask = (df > np.max(df.max()) * quart)
col2 = pd.rolling_mean(df[mask].cumsum()[-1:].transpose(), win)
In [14]:
df1 = pd.DataFrame()
df1['sum'] = col1[1747]
df1['BCM'] = '-'
df1['pos'] = df1.index
df2 = pd.DataFrame()
df2['sum'] = col2[1747]
df2['BCM'] = '+'
df2['pos'] = df2.index
df = pd.concat([df1,df2])
df = df.dropna().reset_index(drop=True)
df['sum'] = df['sum'] / 1e5
In [15]:
p = ggplot(df, aes(x='pos', y='sum', color='BCM')) \
+ geom_line(size=3, alpha=0.7) \
+ scale_x_continuous(name="position from TSS") \
+ scale_y_continuous(name="Mean sum of 50% quartile read depth, x10^-5") \
+ theme(axis_title=element_text(size=20),
axis_text=element_text(size=18))
print(p)
In [ ]: