In [ ]:
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats
from scipy.stats import t as tstat

In [ ]:
def ste(_x):
    return stats.sem(_x, axis=None, ddof=(len(_x)-1.))

def regression(x, y, visualize=True, print_stats=True, p=0.95, two_tailed=True, extent=None):
    slope, intercept, r_value, p_value, std_err = stats.linregress(x, y)
    
    print('p={0}, r={1}, y={2}*x+{3}, ste={4}'.format(p_value, r_value, slope, intercept, std_err))
    
    mean_x = np.mean(x)
    n = len(x)
    p_tailed = p
    if two_tailed:
        p_tailed = p + ((1. - p) / 2.)
    t = tstat.ppf(p_tailed, n - 1.)
    s_err = ste(y)
    
    z = np.polyfit(x,y,1)
    p_x = np.arange(np.min(x),np.max(x)+1,1)
    
    confs = t * np.sqrt((s_err/(n-2))*(1.0/n + (np.power((p_x-mean_x),2)/((np.sum(np.power(x,2)))-n*(np.power(mean_x,2))))))
    
    p_y = z[0]*p_x+z[0]

    lower = p_y - abs(confs)
    upper = p_y + abs(confs)
    
    c_x = x
    c_y = [slope * i + intercept for i in x]

    if visualize:
        plt.xlabel('X values')
        plt.ylabel('Y values')
        plt.title('Linear regression and confidence limits')

        plt.plot(x,y,'bo',label='Sample observations')
        plt.plot(c_x,c_y,'r-',label='Regression line')

        tail = 'one-tailed'
        if two_tailed:
            tail = 'two-tailed'

        plt.plot(p_x,lower,'b--',label='Lower confidence limit ({0}% {1})'.format(p, tail))
        plt.plot(p_x,upper,'b--',label='Upper confidence limit ({0}% {1})'.format(p, tail))

        if extent is None:
            extent = [[min(x), max(x)], [min(y), max(y)]]
        plt.xlim(*extent[0])
        plt.ylim(*extent[1])

        plt.legend(loc=0)
        leg = plt.gca().get_legend()
        ltext = leg.get_texts()
        plt.setp(ltext, fontsize=10)

        plt.show()

In [ ]:
# example data
x = np.array([4.0,2.5,3.2,5.8,7.4,4.4,8.3,8.5])
y = np.array([2.1,4.0,1.5,6.3,5.0,5.8,8.1,7.1])
regression(x, y, extent=[[0, 10], [0, 10]])

In [ ]:
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats

In [ ]:
def t_test(a, b, visualize=True, print_stats=True):
    t, p = scipy.stats.ttest_ind(a, b, equal_var=False)
    
    print('t={0}, p={1}'.format(t, p))
    
    n_a = len(a)
    n_b = len(b)
    mean_a = np.mean(a)
    mean_b = np.mean(b)
    ste_a = ste(a)
    ste_b = ste(b)

    fig, ax = plt.subplots()
    rects1 = ax.bar(np.arange(2), (mean_a, mean_b), 0.8, color='r', yerr=(ste_a, ste_b))

    plt.show()

In [ ]:
t_test(x, y)

In [ ]:
import statsmodels.sandbox.stats.multicomp as multi

In [ ]:
def multiple_compare_columns(a, b, a_cols, b_cols, p=0.95):
    '''
    From: http://www.statsmodels.org/devel/generated/statsmodels.sandbox.stats.multicomp.multipletests.html#statsmodels.sandbox.stats.multicomp.multipletests
    `bonferroni` : one-step correction
    `sidak` : one-step correction
    `holm-sidak` : step down method using Sidak adjustments
    `holm` : step-down method using Bonferroni adjustments
    `simes-hochberg` : step-up method  (independent)
    `hommel` : closed method based on Simes tests (non-negative)
    `fdr_bh` : Benjamini/Hochberg  (non-negative)
    `fdr_by` : Benjamini/Yekutieli (negative)
    `fdr_tsbh` : two stage fdr correction (non-negative)
    `fdr_tsbky` : two stage fdr correction (non-negative)
    '''
    
    reject, pvals, alphacSidak, alphacBonf = multi.multipletests([0.1, 0.12, 0.13, 0.04], alpha=0.05, method='simes-hochberg')
    
    return reject, pvals, alphacSidak, alphacBonf

In [ ]:
multiple_compare_columns(0, 0, 0, 0)

In [ ]:


In [ ]: