In [1]:
from bokeh.plotting import figure, show, output_notebook
from math import log10
from scipy.stats.stats import pearsonr, spearmanr
import pandas as pd
from time import time
from IPython.html.widgets import interactive, Select, Dropdown, HBox, VBox
from IPython.display import display
In [2]:
output_notebook()
In [3]:
def load_data(fp, index_col, sep="\t"):
x = pd.read_csv(fp, sep=sep, na_values='')
x.set_index(index_col, inplace=True)
return x
def build_figure(data, xcol, ycol, xscale='linear', yscale='linear'):
tools=['resize', 'save']
p = figure(x_axis_type=xscale, y_axis_type=yscale)
p.xaxis.axis_label = xcol
p.yaxis.axis_label = ycol
p.circle(data[xcol], data[ycol], fill_alpha=0.2, size=5)
return p
def calculate_coeff(data, xcol, ycol, xscale='linear', yscale='linear'):
if xscale == 'log':
xdata = [log10(x) for x in data[xcol]]
else:
xdata = data[xcol]
if yscale == "log":
ydata = [log10(y) for y in data[ycol]]
else:
ydata = data[ycol]
return pearsonr(xdata, ydata), spearmanr(xdata, ydata)
def update(x_axis, y_axis, x_scale, y_scale):
p, s = calculate_coeff(data, x_axis, y_axis, x_scale, y_scale)
print "Pearson Correlation: ", p
print "Spearman Correlation: ", s
start = time()
fig = build_figure(data, x_axis, y_axis, x_scale, y_scale)
show(fig)
def widgets():
columns = tuple(data.columns)
xa_widget = Select(options=columns)
ya_widget = Select(options=columns)
xs_widget = Dropdown(options=['linear', 'log'])
ys_widget = Dropdown(options=['linear', 'log'])
i = interactive(update, x_axis=xa_widget, x_scale=xs_widget, y_axis=ya_widget, y_scale=ys_widget)
disp = VBox([HBox([i.children[0], i.children[2]]), HBox([i.children[1] ,i.children[3]])])
display(disp)
In [4]:
fp = "/Users/luke/krse2011/db/krse2011_v5_humann_KOrelAbund_read1_wNames.tsv"
data = load_data(fp, 'Sample')
In [5]:
widgets()
In [ ]:
In [ ]: