In [1]:
from bokeh.plotting import figure, show, output_notebook
from math import log10
from scipy.stats.stats import pearsonr, spearmanr
import pandas as pd

In [2]:
output_notebook()


BokehJS successfully loaded.

In [3]:
def load_data(fp, index_col, sep="\t"):
    x = pd.read_csv(fp, sep=sep, na_values='')
    x.set_index(index_col, inplace=True)
    return x
    
def build_figure(data, xcol, ycol, xscale, yscale):
    tools=['resize', 'save']
    p = figure(x_axis_type=xscale, y_axis_type=yscale)
    p.xaxis.axis_label = xcol
    p.yaxis.axis_label = ycol
    p.circle(data[xcol], data[ycol], fill_alpha=0.2, size=5) 
    return p

def calculate_coeff(data, xcol, ycol, xscale='linear', yscale='linear'):
    if xscale == 'log':
        xdata = [log10(x) for x in data[xcol]]
    else:
        xdata = data[xcol]
        
    if yscale == "log":
        ydata = [log10(y) for y in data[ycol]]
    else:
        ydata = data[ycol]

    return pearsonr(xdata, ydata), spearmanr(xdata, ydata)

def update(x_axis, y_axis, x_scale, y_scale):
    p, s = calculate_coeff(data, x_axis, y_axis, x_scale, y_scale)
    print "Pearson Correlation: ", p
    print "Spearman Correlation: ", s
    show(build_figure(data, x_axis, y_axis, x_scale, y_scale))

In [4]:
fp = "/Users/luke/krse2011/db/krse2011_v5_humann_KOrelAbund_read1_wNames.tsv"
data = load_data(fp, 'Sample')

In [5]:
from IPython.html.widgets import interact, DropdownWidget, Checkbox
 interact(update, x_axis=tuple(data.columns), y_axis=tuple(data.columns), x_scale=["linear", "log"], y_scale=["linear", "log"])


Pearson Correlation:  (0.23221546420800382, 0.12478839792308431)
Spearman Correlation:  (0.20633944787500275, 0.17386075052234226)

In [ ]:


In [ ]: