In [ ]:
import randomCorr as rc
%load ../../bayesian-cov-estimation/utils/matrix_functions.py
In [ ]:
import numpy as np
import pandas as pd
def cov2cor(matrix):
y = np.sqrt(np.diagonal(matrix))
y = (y[:, np.newaxis] * y)
corrmatrix = matrix / y
return corrmatrix
def calc_r2(matrix, isVar=False):
tr = matrix.shape[1]
cmatrix = cov2cor(matrix) if isVar else matrix
r2_total = np.mean(cmatrix[np.triu_indices(tr,1)] * cmatrix[np.triu_indices(tr,1)])
return r2_total
def icv_calc(matrix):
eVal, eVec = np.linalg.eigh(matrix)
icv = np.sqrt(np.var(eVal)) / np.mean(eVal)
return icv
def cos_angle(vector1, vector2):
angle = np.dot(vector1, vector2) / (np.linalg.norm(vector1) *
np.linalg.norm(vector2))
return angle
def random_skewers(matrix1, matrix2, num_vectors=1000):
traits = matrix1.shape[0]
rand_vec = np.random.multivariate_normal(np.zeros(traits),
np.identity(traits, float),
num_vectors).T
delta_z1 = np.dot(matrix1, rand_vec)
delta_z2 = np.dot(matrix2, rand_vec)
ndelta_z1 = delta_z1/np.sqrt((delta_z1*delta_z1).sum(0))
ndelta_z2 = delta_z2/np.sqrt((delta_z2*delta_z2).sum(0))
return np.mean(np.diag(np.dot(ndelta_z1.T, ndelta_z2)))
def read_matrices(matrices, labels, num_traits):
raw_matrices = pd.read_csv(matrices)
with open(labels) as f:
monkey_labels = f.read().splitlines()
def make_symetric(matrix):
matrix = np.tril(matrix) + np.tril(matrix, k=-1).transpose()
return matrix
node_matrices = {monkey_labels[0]: np.array(raw_matrices.ix[0:num_traits-1, ])}
for i in range(1, len(monkey_labels)):
new_matrix = np.array(raw_matrices.ix[i*num_traits:(((i+1)*num_traits)-1), :])
node_matrices[monkey_labels[i]] = make_symetric(new_matrix)
return node_matrices
def matrix_correlation(matrix1, matrix2):
tr = matrix1.shape[0]
correlation = np.corrcoef(matrix1[np.triu_indices(tr,1)], matrix2[np.triu_indices(tr,1)])[1, 0]
return correlation
In [6]:
NUM_MATRICES = 2000
ms = []
for i in xrange(NUM_MATRICES):
m = rc.rand_corr(35, 10**-3)
ms.append(m)
In [ ]:
rs0 = []
for i in xrange(NUM_MATRICES):
rs0.append(random_skewers(ms[0], ms[i]))
In [ ]:
mc0 = []
for i in xrange(NUM_MATRICES):
mc0.append(matrix_correlation(ms[0], ms[i]))
In [28]:
hist(mc0[1:])
Out[28]:
In [29]:
hist(rs0[1:])
Out[29]:
In [24]:
scatter(mc0[1:], rs0[1:])
Out[24]:
In [25]:
average(mc0)
Out[25]:
In [26]:
average(rs0)
Out[26]:
In [ ]: