Análise de RS para matrizes aleatórias


In [ ]:
import randomCorr as rc
%load  ../../bayesian-cov-estimation/utils/matrix_functions.py

In [ ]:
import numpy as np
import pandas as pd

def cov2cor(matrix):
    y = np.sqrt(np.diagonal(matrix))
    y = (y[:, np.newaxis] * y)
    corrmatrix = matrix / y
    return corrmatrix

def calc_r2(matrix, isVar=False):
    tr = matrix.shape[1]
    cmatrix = cov2cor(matrix) if isVar else matrix
    r2_total = np.mean(cmatrix[np.triu_indices(tr,1)] * cmatrix[np.triu_indices(tr,1)])
    return r2_total

def icv_calc(matrix):
    eVal, eVec = np.linalg.eigh(matrix)
    icv = np.sqrt(np.var(eVal)) / np.mean(eVal)
    return icv

def cos_angle(vector1, vector2):
    angle = np.dot(vector1, vector2) / (np.linalg.norm(vector1) *
                                        np.linalg.norm(vector2))
    return angle

def random_skewers(matrix1, matrix2, num_vectors=1000):
    traits = matrix1.shape[0]
    rand_vec = np.random.multivariate_normal(np.zeros(traits),
                                             np.identity(traits, float),
                                             num_vectors).T
    delta_z1 = np.dot(matrix1, rand_vec)
    delta_z2 = np.dot(matrix2, rand_vec)

    ndelta_z1 = delta_z1/np.sqrt((delta_z1*delta_z1).sum(0))
    ndelta_z2 = delta_z2/np.sqrt((delta_z2*delta_z2).sum(0))

    return np.mean(np.diag(np.dot(ndelta_z1.T, ndelta_z2)))

def read_matrices(matrices, labels, num_traits):
    raw_matrices = pd.read_csv(matrices)
    with open(labels) as f:
        monkey_labels = f.read().splitlines()

    def make_symetric(matrix):
        matrix = np.tril(matrix) + np.tril(matrix, k=-1).transpose()
        return matrix

    node_matrices = {monkey_labels[0]: np.array(raw_matrices.ix[0:num_traits-1, ])}
    for i in range(1, len(monkey_labels)):
        new_matrix = np.array(raw_matrices.ix[i*num_traits:(((i+1)*num_traits)-1), :])
        node_matrices[monkey_labels[i]] = make_symetric(new_matrix)

    return node_matrices

def matrix_correlation(matrix1, matrix2):
    tr = matrix1.shape[0]
    correlation = np.corrcoef(matrix1[np.triu_indices(tr,1)], matrix2[np.triu_indices(tr,1)])[1, 0]
    return correlation

In [6]:
NUM_MATRICES = 2000

ms = []

for i in xrange(NUM_MATRICES):
    m = rc.rand_corr(35, 10**-3)
    ms.append(m)


---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-6-6ddbaddb9e5a> in <module>()
      4 
      5 for i in xrange(NUM_MATRICES):
----> 6     m = rc.rand_corr(35, 10**-3)
      7     ms.append(m)

/home/walrus/corr-param/randomCorr.pyc in rand_corr(n, ke)
     14     for i in xrange(2, n):
     15         for j in xrange(1, i):
---> 16             b1 = np.dot(b[j, 0:j], b[i, 0:j].T)
     17             b2 = np.dot(b[j, j], b[i, j])
     18             z = b1 + b2

KeyboardInterrupt: 

In [ ]:
rs0 = []

for i in xrange(NUM_MATRICES):
    rs0.append(random_skewers(ms[0], ms[i]))

In [ ]:
mc0 = []

for i in xrange(NUM_MATRICES):
    mc0.append(matrix_correlation(ms[0], ms[i]))

In [28]:
hist(mc0[1:])


Out[28]:
(array([ 84, 222, 310, 185, 117,  52,  19,   8,   1,   1]),
 array([-0.08093416, -0.05337393, -0.0258137 ,  0.00174653,  0.02930676,
        0.056867  ,  0.08442723,  0.11198746,  0.13954769,  0.16710792,
        0.19466816]),
 <a list of 10 Patch objects>)

In [29]:
hist(rs0[1:])


Out[29]:
(array([ 28, 113, 222, 251, 192, 106,  59,  18,   8,   2]),
 array([ 0.05346387,  0.07255749,  0.09165112,  0.11074474,  0.12983837,
        0.148932  ,  0.16802562,  0.18711925,  0.20621287,  0.2253065 ,
        0.24440012]),
 <a list of 10 Patch objects>)

In [24]:
scatter(mc0[1:], rs0[1:])


Out[24]:
<matplotlib.collections.PathCollection at 0x6127290>

In [25]:
average(mc0)


Out[25]:
-0.0019528180855193302

In [26]:
average(rs0)


Out[26]:
0.12447475666827049

In [ ]: