In [1]:
from bokeh.plotting import figure, show, output_notebook
from math import log10
from scipy.stats.stats import pearsonr, spearmanr
import pandas as pd
from time import time

from IPython.html.widgets import interactive, Select, Dropdown, HBox, VBox
from IPython.display import display


:0: FutureWarning: IPython widgets are experimental and may change in the future.

In [2]:
output_notebook()


BokehJS successfully loaded.

In [3]:
def load_data(fp, index_col, sep="\t"):
    x = pd.read_csv(fp, sep=sep, na_values='')
    x.set_index(index_col, inplace=True)
    return x
    
def build_figure(data, xcol, ycol, xscale='linear', yscale='linear'):
    tools=['resize', 'save']
    p = figure(x_axis_type=xscale, y_axis_type=yscale)
    p.xaxis.axis_label = xcol
    p.yaxis.axis_label = ycol
    p.circle(data[xcol], data[ycol], fill_alpha=0.2, size=5) 
    return p

def calculate_coeff(data, xcol, ycol, xscale='linear', yscale='linear'):
    if xscale == 'log':
        xdata = [log10(x) for x in data[xcol]]
    else:
        xdata = data[xcol]
        
    if yscale == "log":
        ydata = [log10(y) for y in data[ycol]]
    else:
        ydata = data[ycol]

    return pearsonr(xdata, ydata), spearmanr(xdata, ydata)

def update(x_axis, y_axis, x_scale, y_scale):
    p, s = calculate_coeff(data, x_axis, y_axis, x_scale, y_scale)
    print "Pearson Correlation: ", p
    print "Spearman Correlation: ", s
    start = time()
    fig = build_figure(data, x_axis, y_axis, x_scale, y_scale)
    show(fig)
    
def widgets():
    columns = tuple(data.columns)
    xa_widget = Select(options=columns)
    ya_widget = Select(options=columns)
    xs_widget = Dropdown(options=['linear', 'log'])
    ys_widget = Dropdown(options=['linear', 'log'])
    i = interactive(update, x_axis=xa_widget, x_scale=xs_widget, y_axis=ya_widget, y_scale=ys_widget)
    
    disp = VBox([HBox([i.children[0], i.children[2]]), HBox([i.children[1] ,i.children[3]])])
    display(disp)

In [4]:
fp = "/Users/luke/krse2011/db/krse2011_v5_humann_KOrelAbund_read1_wNames.tsv"
data = load_data(fp, 'Sample')

In [5]:
widgets()


Pearson Correlation:  (-0.79929825605387339, 4.6157116368824256e-11)
Spearman Correlation:  (-0.69210789229684422, 1.3942081547062504e-07)

In [ ]:


In [ ]: