Utility functions.



In [ ]:

    
import numpy as np



In [ ]:

    
def get_chisq_sensitivity(NN_case, NN_control):
    """sensitivity for the chi-square statistic based on 2x3 genotype tables"""
    NN = NN_case + NN_control  # total number of subjects
    CC_max = max(NN_case, NN_control)
    CC_min = min(NN_case, NN_control)
    sensitivity = 1. * NN**2 / (CC_min * (CC_max + 1))  # sensitivity of chisq
    return sensitivity



In [ ]:

    
def get_allelic_test_sensitivity(NN_case, NN_control):
    """sensitivity for the chi-square statistic based on 2x2 allelic tables derived from 2x3 genotype tables"""
    def sensitivity_type_1(SS, RR):
        NN = SS + RR
        return 1.0 * 8 * NN**2 * SS / \
                (RR * (2 * SS + 3) * (2 * SS + 1))
    
    def sensitivity_type_2(SS, RR):
        NN = SS + RR
        return 1.0 * 4 * NN**2 * ((2 * RR**2 - 1) * (2 * SS - 1) - 1) / \
                (SS * RR * (2 * RR + 1) * (2 * RR - 1) * (2 * SS + 1))

    return np.max([sensitivity_type_1(NN_case, NN_control),
                   sensitivity_type_1(NN_control, NN_case),
                   sensitivity_type_2(NN_case, NN_control),
                   sensitivity_type_2(NN_control, NN_case)])



In [ ]:

    
def check_table_valid(input_table):
    """Make sure that the margins (row sums and column sums ) are all positive.
    Args:
        input_table: A 2x3 numpy matrix.
    """
    ## check zero margins
    rowsum = np.array(map(np.sum, input_table))
    colsum = np.array(map(np.sum, input_table.T))
    if np.any(rowsum == 0) or np.any(colsum == 0):
        return False
    else:
        return True



In [ ]:

    
def chisq_stat(input_table):
    """Calculate the Pearson's chi-square staitsitc.
    Args:
        input_table: A 2x3 numpy matrix.

    Returns:
        A tuple (chisquare_statistics, degree_of_freedom).
    """
    input_table = input_table.astype(float)
    rowsum = np.array(map(np.sum, input_table))
    colsum = np.array(map(np.sum, input_table.T))
    expected = np.outer(rowsum, colsum) / np.sum(rowsum)
    # df = (len([1 for rr in rowsum if rr > 0]) - 1) * \
    #     (len([1 for cc in colsum if cc > 0]) - 1)
    chisq = np.sum(np.array(input_table[expected > 0] -
                            expected[expected > 0]) ** 2 /
                   expected[expected > 0])
    # return (chisq, df)
    return chisq



In [ ]:

    
def chisq_gradient(input_table):
    """Return the changable part of the gradient of the chi-square staitsitc.

    Args:
        input_table: A 2x3 numpy matrix.

    Returns:
        A four-element tuple consisting of the partial derivatives based on the
        parametrization the chi-square statistic by (r0, r1, n0, n1). The
        full parametrization would be
        (r0, r1, r2, s0, s1, s2, n0, n1, n2), where ri + si = ni. The returned 
        value will be scaled down by N^2 / (R * S).
    """
    input_table = input_table.astype(float)
    colsum = np.array(map(np.sum, input_table.T))
    ## divide each cell by colsum
    fraction_table = input_table / colsum
    dy_dr0, dy_dr1 = [2 * fraction_table[0, ii] - 2 * fraction_table[0, 2] for
                      ii in [0, 1]]
    dy_dn0, dy_dn1 = [-fraction_table[0, ii] ** 2 + fraction_table[0, 2] ** 2 for
                      ii in [0, 1]]
    return (dy_dr0, dy_dr1, dy_dn0, dy_dn1)