In [2]:
import numpy as np

Vectorization - use a single matrix multiplication to avoid for-loop vector multiplicaions.

Axis

Repeat

Repeat a vector to matrix


In [3]:
v = np.array([1, 2, 3])

In [4]:
np.repeat(v[:, np.newaxis], 3, 1)


Out[4]:
array([[1, 1, 1],
       [2, 2, 2],
       [3, 3, 3]])

In [6]:
np.repeat(v[np.newaxis, :], 3, 0)


Out[6]:
array([[1, 2, 3],
       [1, 2, 3],
       [1, 2, 3]])

Vectorize a Function

Vectorize the function and then apply it directly to a Numpy array.

Better to specify an explicit output type directly when vectorizing:

f = np.vectorize(f, otypes=[numpy.float])

In [7]:
def my_func(a):
    """Average first and last element of a 1-D array"""
    return (a[0] + a[-1]) * 0.5
b = np.array([[1,2,3], [4,5,6], [7,8,9]])
np.apply_along_axis(my_func, 0, b)


Out[7]:
array([ 4.,  5.,  6.])

Example

Viterbi Algorithm

def forward(self):
    """
    Forward Viterbi
    """
    self._init_l()
    for t in xrange(1, self.T):
        for j in xrange(self.I):
            i_op = np.argmax(self.l_op[:, t-1] + np.log(self.a[:, j]))
            self.phi[j, t] = i_op
            self.l_op[j, t] = (self.l_op[i_op, t-1] + np.log(self.a[i_op, j]))
            self.l_op[j, t] += np.log(self.b[j, self.o[t]])

After vectorization

def forward(self):
    """
    Forward Viterbi
    """
    self._init_l()
    for t in xrange(1, self.T):
        self.phi[:, t] = np.argmax(np.log(self.a) + self.l_op[:, t-1], axis=1)
        self.l_op[:, t] = np.max(np.log(self.a) + self.l_op[:, t-1], axis=1)
        self.l_op[:, t] += np.log(self.b[:, self.o[t]])

Nearest Neighbor

power, sum, argmin

def predict(self, x):
    """
    :param: x, the data point vector to be predicted 
    """
    x_mat = np.repeat(x[:, np.newaxis], self.data.T, 1)
    dist = self.data.X - x_mat
    dist = np.power(dist, 2)
    dist = np.sum(dist, 0)
    idx = np.argmin(dist)
    return self.data.Y[idx]

PCA

Center data $$X = X - \mu$$

X = X - np.repeat(mu[:, np.newaxis], T, axis=1)

group by class ==

for t in xrange(T):
    y = Y[t]
    x = X[:, t]
    gaussians[y].X = np.c_[gaussians[y].X, x.reshape(self.dim, 1)]
    print t

# vectorization
for i, g in enumerate(gaussians):
    g.X = X[:, Y==i]

Prediction

multivariate_normal.pdf is inheriently vectorization-friendly.

def predict(self, X):
    predict = np.zeros((len(self.gaussians), X.shape[1]))
    for i, g in enumerate(self.gaussians):
        predict[i, :] = np.log(
            g.X.shape[1]/float(self.T)*
            # transpose
            multivariate_normal.pdf(X.T, g.mu, g.smoothed_Sigma).T
        )

    return np.argmax(predict, axis=0)

Logistic Regression

def first_derivative(self, X, Y, w):
    """
    Calculate the 1st derivative of log-loss function
    """
    d, T = X.shape
    sigma = logistic.cdf(np.multiply(-Y, np.dot(w.T, X)))
    ret = np.multiply(sigma, Y)
    ret = np.multiply(np.repeat(ret[np.newaxis, :], d, axis=0), X)
    ret = np.sum(ret, axis=1)
    return ret

def log_loss(self, X, Y, w):
    L = np.log(logistic.cdf(np.multiply(Y, np.dot(w.T, X))))
    L = np.sum(L)
    L = -L
    return L

In [ ]: