In [2]:
import numpy as np
import random
from matplotlib import pyplot as plt
plt.rcParams['figure.figsize'] = (10.0, 8.0)
from sklearn.datasets import make_biclusters
from sklearn.datasets import samples_generator as sg
# from sklearn.cluster.bicluster import SpectralCoclustering
from sklearn.metrics import consensus_score
from biclustering import DeltaBiclustering, MSR
In [26]:
%pylab inline
In [5]:
from IPython.core.display import Image
Image(filename='synteticBiclusters.png')
Out[5]:
In [71]:
def generate_dataset(option, noise=1, noise_background=True, shuffle=False):
shape = (150,150)
n,m = shape
# values can't be a lot far...
centers = [20, 40, 60, 80, 100]
y_row = np.zeros(150)
y_col = np.zeros(150)
if noise_background:
data = np.random.rand(n, m)*100
else:
data = np.zeros(n*m).reshape(shape)
if option == 'a':
data[60:110][:,70:140] = np.random.rand(50,70)*noise + centers[0]
y_row[60:110] += 1
y_col[70:140] += 1
elif option == 'd':
data[0:50][:,0:70] = np.random.rand(50,70)*noise + centers[0]
y_row[0:50] += 1
y_col[0:70] += 1
data[50:100][:,50:100] = np.random.rand(50,50)*noise + centers[2]
y_row[50:100] += 2
y_col[50:100] += 2
data[100:150][:,80:150] = np.random.rand(50,70)*noise + centers[1]
y_row[100:150] += 3
y_col[80:150] += 3
elif option == 'e':
data[0:70][:,0:50] = np.random.rand(70,50)*noise + centers[3]
y_row[0:70] += 1
y_col[0:50] += 1
data[50:100][:,50:100] = np.random.rand(50,50)*noise + centers[1]
y_row[50:100] += 2
y_col[50:100] += 2
data[80:150][:,100:150] = np.random.rand(70,50)*noise + centers[2]
y_row[80:150] += 3
y_col[100:150] += 3
elif option == 'f':
data[0:50][:,0:40] = np.random.rand(50,40)*noise + centers[4]
y_row[0:50] += 1
y_col[0:40] += 1
data[50:150][:,0:40] = np.random.rand(100,40)*noise + centers[0]
y_row[50:150] += 2
data[110:150][:,40:95] = np.random.rand(40,55)*noise + centers[2]
y_row[110:150] += 3
y_col[40:95] += 2
data[110:150][:,95:150] = np.random.rand(40,55)*noise + centers[1]
y_row[110:150] += 3
y_col[95:150] += 3
elif option == 'g':
data[0:110][:,0:40] = np.random.rand(110,40)*noise + centers[0]
data[110:150][:,0:110] = np.random.rand(40,110)*noise + centers[2]
data[40:150][:,110:150] = np.random.rand(110,40)*noise + centers[1]
data[0:40][:,40:150] = np.random.rand(40,110)*noise + centers[3]
elif option == 'h':
data[0:90][:,0:90] = np.random.rand(90,90)*noise + centers[0]
data[35:55][:,35:55] = (np.random.rand(20,20)*noise + centers[1]) + data[35:55][:,35:55]
data[110:140][:,35:90] = np.random.rand(30,55)*noise + centers[4]
data[0:140][:,110:150] = np.random.rand(140,40)*noise + centers[2]
data[0:55][:,130:150] = (np.random.rand(55,20)*noise + centers[3]) + data[0:55][:,130:150]
elif option == 'i':
data[20:70][:,20:70] = np.random.rand(50,50)*noise + centers[0]
data[20:70][:,100:150] = np.random.rand(50,50)*noise + centers[1]
data[50:110][:,50:120] = np.random.rand(60,70)*noise + centers[2]
data[120:150][:,20:100] = np.random.rand(30,80)*noise + centers[3]
if shuffle:
np.random.shuffle(data)
np.random.shuffle(data.T)
return data, y_row, y_col
In [59]:
%%latex
This is a coclustering algorithm called Block Value Decomposition (BVD) based on Nonnegative Matrix Factorization (NMF)
technique.
The goal is to find a factorization for the data matrix $X \in \mathbb{R}_{+}^{n \times m}$, where $n$ is the number of objects
and $m$ is the number of features of theses objects and the factorization takes the form: $X \approx USV^T$, where
$U \in \mathbb{R}_{+}^{n \times l}$ is a matrix of rows factors representing features clusters,
$S \in \mathbb{R}_{+}^{l \times k}$ is a block matrix representing how blocks are related, and
$V \in \mathbb{R}_{+}^{m \times k}$ is a matrix of columns factors representing rows clusters.
In [60]:
%%latex
This algorithm solves the following optimization problem:
$$\textit{min } ||X - USV^T||^2 \textit{ s.t. } U \geq 0, S \geq 0, V \geq 0$$
In [61]:
%%latex
The optimization problem can be solved using Lagrange multipliers ($\lambda$), optimizing the following Lagrange function:
$${\cal L} = |X - USV^T||^2 - tr(\lambda_1U^T) - tr(\lambda_2S^T) - tr(\lambda_3V^T)$$
In [62]:
%%latex
Then ${\cal L}$ must satisfy the K.K.T. conditions:
$$\frac{\partial {\cal L}}{\partial U} = 0$$
$$\frac{\partial {\cal L}}{\partial S} = 0$$
$$\frac{\partial {\cal L}}{\partial V} = 0$$
$$\lambda_1 \odot U = 0$$
$$\lambda_2 \odot S = 0$$
$$\lambda_3 \odot V = 0$$
In [63]:
%%latex
Solving the derivatives and equal them to $0$, is possible to solve the optimization problem by applying gradient
ascending on ${\cal L}$ with the following update rules:
$$U \gets U \odot \frac{XVS^T}{USV^TVS^T}$$
$$V \gets V \odot \frac{X^TUS}{VS^TU^TUS}$$
$$S \gets S \odot \frac{U^TXV}{U^TU^TSV^TV}$$
In [64]:
def nmtf(X, k, l, num_iter=1):
"""
Performs nonnegative block value decomposition
Parameters
----------
X : array or sparse (CSR) matrix of shape (n_samples, n_features), or \
array of shape (n_samples, n_samples)
A feature array, or array of distances between samples if
``metric='precomputed'``.
k : int
number of rows clusters
l : int
number of columns clusters
num_iter : int, optional
number of iterations to make the factorization
Returns
-------
U : array matrix of shape (n_samples, n_features_clusters)
matrix of rows factors representing features clusters
S : array matrix of shape (n_samples_clusters, n_features_clusters)
block matrix representing how blocks are related
V : array matrix of shape (n_features, n_samples_clusters)
"""
def scale(A):
return (A - A.min()) / (A.max() - A.min())
n, m = X.shape
U = (np.random.rand(n,k))
S = (np.random.rand(k,l))
V = (np.random.rand(l,m))
X_norm = (X)
for i in xrange(num_iter):
U_delta = (X_norm.dot(V.T).dot(S.T)) / (U.dot(S).dot(V).dot(V.T).dot(S.T))
U_new = np.multiply(U,U_delta)
V_delta = (S.T.dot(U.T).dot(X_norm)) / (S.T.dot(U.T).dot(U).dot(S).dot(V))
V_new = np.nan_to_num(np.multiply(V,V_delta))
S_delta = (U.T.dot(X_norm).dot(V.T)) / (U.T.dot(U).dot(S).dot(V).dot(V.T))
S_new = np.nan_to_num(np.multiply(S,S_delta))
np.sum((X - U_new.dot(S_new).dot(V_new.T))
U = U_new
V = V_new
S = S_new
# normalization
# U_diag = np.diag(np.diag(np.ones(n).dot(U)))
# V_diag = np.diag(np.diag(np.ones(m).dot(U.T).T))
# U = np.multiply(U, np.diag(S.dot(V_diag)))
# V = np.multiply(V, np.diag(U_diag.dot(S)))
return U, S, V
In [1]:
%%latex
In this case, the goal is to optimize the following problem:
$$\textit{min } ||X - USV^T||^2 \textit{ s.t. } U \in \Psi^{n \times k}, S \in \mathbb{R}_{+}^{l \times k}, V \in \Psi^{m \times l}$$
where $U$ and $V$ turns into cluster indicator matrices, with vectors $\vec{u_i}$ and $\vec{v_j}$ which contains $1$ in
only one position, indicating the cluster that that this vector belongs, and $0s$ in the rest.
In [23]:
%%latex
Similar to the other algorithm, it optimizes $S$ with a multiplicative update rule and the following subproblems:
$$S \gets (U^TU)^{-1}U^TXV(V^TV)^{-1}$$
$$v_{ij} \left\{
\begin{array}{ll}
1 & j = \textit{argmin}_l ||\vec{x_i} - \vec{\tilde{u_l}}||^2 \\
0 & \textit{otherwise}
\end{array}
\right.$$
$$u_{ij} \left\{
\begin{array}{ll}
1 & i = \textit{argmin}_k ||\vec{x_j} - \vec{\tilde{v_k}}||^2 \\
0 & \textit{otherwise}
\end{array}
\right.$$
where $\tilde{U} = US$ and $\tilde{V} = SV^T$
In [ ]:
def fnmtf(X, k, l, num_iter=1):
def scale(A):
return (A - A.min()) / (A.max() - A.min())
n, m = X.shape
U = (np.random.rand(n,k))
S = (np.random.rand(k,l))
V = (np.random.rand(m,l))
# solve subproblem to update U
U_new = np.zeros(n*k).reshape(n,k)
X_norm = (X)
for i in xrange(num_iter):
S = np.linalg.pinv(U.T.dot(U)).dot(U.T).dot(X_norm).dot(V).dot(np.linalg.pinv(V.T.dot(V)))
U_tilde = U.dot(S)
for i in xrange(m):
subproblem_result = np.zeros(l)
for j in xrange(l):
subproblem_result[j] = np.linalg.norm(X[:][:,i] - U_tilde[:][:,j])
ind = np.argmin(subproblem_result)
U_new[i][ind] = 1
# solve subproblem to update V
for j in xrange(n):
subproblem_result = np.zeros(k)
for i in xrange(k):
optimization_result[i] = np.linalg.norm(X[j][:] - V_tilde.T[i][:])
ind = np.argmin(optimization_result)
V_new[j][ind] = 1
U = U_new
V = V_new
S = S_new
# normalization
# U_diag = np.diag(np.diag(np.ones(n).dot(U)))
# V_diag = np.diag(np.diag(np.ones(m).dot(U.T).T))
# U = np.multiply(U, np.diag(S.dot(V_diag)))
# V = np.multiply(V, np.diag(U_diag.dot(S)))
return U, S, V
In [74]:
X, _, _ = generate_dataset(option='a', noise=1.0, noise_background=False)
plt.matshow(X, cmap=plt.cm.Blues)
plt.title("Original Data")
plt.show()
U, S, V = nmtf(X,2,2,1)
plt.matshow(U, cmap=plt.cm.Blues)
plt.matshow(S, cmap=plt.cm.Blues)
plt.matshow(V, cmap=plt.cm.Blues)
plt.matshow(U.dot(S).dot(V), cmap=plt.cm.Blues)
Out[74]:
In [22]:
X = np.zeros(10*10).reshape((10,10))
X[3:8][:,3:8] = X[3:8][:,3:8] + 20
plt.matshow(X, cmap=plt.cm.Blues)
plt.title("Original Data")
plt.show()
U, S, V = nmtf(X,2,2,1)
plt.matshow(U, cmap=plt.cm.Blues)
plt.matshow(S, cmap=plt.cm.Blues)
plt.matshow(V, cmap=plt.cm.Blues)
plt.matshow(U.dot(S).dot(V), cmap=plt.cm.Blues)
Out[22]:
In [1]:
X = np.zeros(10*10).reshape((10,10))
X[0:4][:,0:4] = X[0:4][:,0:4] + 20
X[2:8][:,4:6] = X[2:8][:,4:6] + 40
X[6:10][:,6:10] = X[6:10][:,6:10] + 60
plt.matshow(X, cmap=plt.cm.Blues)
plt.title("Original Data")
plt.show()
U, S, V = nmtf(X,4,4,9)
plt.matshow(U, cmap=plt.cm.Blues)
plt.matshow(S, cmap=plt.cm.Blues)
plt.matshow(V, cmap=plt.cm.Blues)
plt.matshow(U.dot(S).dot(V), cmap=plt.cm.Blues)
In [14]:
plt.matshow(generate_dataset(option='g', noise=1.0, noise_background=False), cmap=plt.cm.Blues)
plt.title("Original Data")
plt.show()
data = generate_dataset(option='g', noise=1.0, noise_background=False, shuffle=False)
X = data
U, S, V = nmtf(X,5,5,num_iter=1)
plt.matshow(U, cmap=plt.cm.Blues)
plt.matshow(S, cmap=plt.cm.Blues)
plt.matshow(V, cmap=plt.cm.Blues)
plt.matshow(U.dot(S).dot(V), cmap=plt.cm.Blues)
Out[14]:
In [ ]: