Numpy snippets

This notebook is inspired by the following page: https://docs.scipy.org/doc/numpy-dev/user/numpy-for-matlab-users.html

Import directives


In [ ]:
%matplotlib inline
#%matplotlib notebook

from IPython.display import display

In [ ]:
import numpy as np
import math

Create arrays


In [ ]:
np.array([1, 2, 3])

In [ ]:
np.array([[1, 2, 3],[4, 5, 6]])

Special matrices


In [ ]:
np.zeros(3)

In [ ]:
np.zeros((3, 4))

In [ ]:
np.ones(3)

In [ ]:
np.ones((3, 4))

In [ ]:
np.eye(3)

Arange


In [ ]:
np.arange(10)

In [ ]:
np.arange(10, 20)

In [ ]:
np.arange(10, 20, 2)

Linspace


In [ ]:
np.linspace(0., 2., 5)

Meshgrid


In [ ]:
xx, yy = np.meshgrid([1, 2, 3], [4, 5, 6])

print(xx)
print()
print(yy)

Random

Uniform distribution in [0, 1]


In [ ]:
np.random.rand(3)

In [ ]:
np.random.rand(3, 4)

Poisson distribution


In [ ]:
np.random.poisson(10, size=[3, 4])

Multivariate normal distribution


In [ ]:
mu = np.array([0., 0.])
cov = np.array([[1., 0.3],
                [0.3, 1.]])
num_points = 10

np.random.multivariate_normal(mu, cov, num_points)

In [ ]:
np.get_printoptions()

In [ ]:
default_threshold = np.get_printoptions()["threshold"]
default_threshold

Arrays with more than default_threshold elements are truncated.


In [ ]:
max_size = math.ceil(math.sqrt(default_threshold))
max_size

In [ ]:
a = np.random.randint(1, size=[max_size + 1, max_size + 1])
a

Print the full array (set threshold to infinity):


In [ ]:
np.set_printoptions(threshold=np.inf)
a

Go back to the default threshold:


In [ ]:
np.set_printoptions(threshold=default_threshold)
a

Dimension and shape


In [ ]:
a = np.array([[1, 2, 3],[4, 5, 6]])

Number of dimensions:


In [ ]:
a.ndim

Number of elements:


In [ ]:
a.size

Number of elements per dimension:


In [ ]:
a.shape

Convert


In [ ]:
l = [[1, 2, 3],[4, 5, 6]]
a = np.array([[1, 2, 3],[4, 5, 6]])

Python list to Numpy array


In [ ]:
np.array(l)

Numpy array to Python list


In [ ]:
a.tolist()

Copy

np.copy()


In [ ]:
a = np.array([[1, 2, 3],[4, 5, 6]])
a

In [ ]:
b = a.copy()
b

In [ ]:
a[0,0] = 10
print(a)
print(b)

np.astype()


In [ ]:
a = np.array([[1, 2, 3],[4, 5, 6]])
a

In [ ]:
b = a.astype('float64', copy=True)
b

In [ ]:
a[0,0] = 10
print(a)
print(b)

Access elements


In [ ]:
a = np.arange(6)

In [ ]:
a

In [ ]:
a[0]

In [ ]:
a[-1]

Slices


In [ ]:
a[1:4]

In [ ]:
a = np.array([[1, 2, 3, 4, 5, 6],
              [10, 20, 30, 40, 50, 60],
              [100, 200, 300, 400, 500, 600]])
a

In [ ]:
a[0,1]

In [ ]:
a[1, :]

In [ ]:
a[1, ::2]

In [ ]:
a[:, 1]

In [ ]:
a[0:2, 2:4]

In [ ]:
a[1:, 1:]

In [ ]:
a[:-1, :-1]

Ellipsis

"The ellipsis is used to slice high-dimensional data structures.

It's designed to mean at this point, insert as many full slices (:) to extend the multi-dimensional slice to all dimensions."

https://stackoverflow.com/questions/118370/how-do-you-use-the-ellipsis-slicing-syntax-in-python


In [ ]:
a = np.arange(2**3).reshape(2, 2, 2)
a

To select all first elements in the last (3rd) dimension


In [ ]:
a[..., 0]

is equivalent to


In [ ]:
a[:, :, 0]

To select all first elements in the first (1st) dimension


In [ ]:
a[0, ...]

is equivalent to


In [ ]:
a[0, :, :]

Filter


In [ ]:
a = np.array([[1, 2, 3, 4, 5, 6],
              [10, 20, 30, 40, 50, 60],
              [100, 200, 300, 400, 500, 600]])
a

Boolean matrix whose i,jth element is (a_ij > 5)


In [ ]:
(a>5)

Find the indices where (a > 5)


In [ ]:
np.nonzero(a>5)

Set or copy a with elements greater than 5 zeroed out


In [ ]:
a * (a<=5)

In [ ]:
a[a>5] = 0
a

Select indices satisfying multiple conditions

Short version


In [ ]:
a = np.array([[-1, 7, 3], [-11, -5, 20]])
a

In [ ]:
a[(a > -10) & (a < 10)] = 0
a

In [ ]:
a[(a < -10) | (a > 10)] = 1
a

Detailed version


In [ ]:
a = np.array([[-1, 7, 3], [-11, -5, 20]])
a

In [ ]:
m1 = (a > -10)
m2 = (a < 10)

print(m1)
print(m2)
print(m1 & m2)

a[m1 & m2] = 0
a

Concatenate

Append 1D arrays


In [ ]:
a = np.array([])
a = np.append(a, 3)
a

Performance test

It's probably not a good idea to use np.append to often as it makes a copy of the array each time it is called...


In [ ]:
%%timeit

a = np.array([])
for i in range(10000):
    a = np.append(a, i)

Lists use a different data structure that makes them more efficient for repeated additions...


In [ ]:
%%timeit

l = []
for i in range(10000):
    l.append(i)

a = np.array(l)

In this case, the better option is probably the following:


In [ ]:
%%timeit

a = np.array([i for i in range(10000)])

Concatenate 1D arrays


In [ ]:
a = np.zeros(3)
b = np.ones(3)
print("a:", a)
print("b:", b)

In [ ]:
np.concatenate([a, b])

In [ ]:
np.hstack([a, b])

Concatenate 2D arrays


In [ ]:
a = np.zeros([2, 3])
b = np.ones([2, 3])

In [ ]:
a

In [ ]:
b

On the first dimension

Using vstack:


In [ ]:
np.vstack([a, b])

In [ ]:
np.vstack([a, b]).shape

Using concatenate:


In [ ]:
np.concatenate([a, b], axis=0)

In [ ]:
np.concatenate([a, b], axis=0).shape

On the second dimension

Using hstack:


In [ ]:
np.hstack([a, b])

In [ ]:
np.hstack([a, b]).shape

Using concatenate:


In [ ]:
np.concatenate([a, b], axis=1)

In [ ]:
np.concatenate([a, b], axis=1).shape

Join a sequence of arrays along a new axis

The axis parameter specifies the index of the new axis in the dimensions of the result.


In [ ]:
a = np.zeros([2, 3])
b = np.ones([2, 3])

In [ ]:
a

In [ ]:
b

Along axis 0


In [ ]:
np.stack([a, b], axis=0)

In [ ]:
np.stack([a, b], axis=0).shape

Along axis 1


In [ ]:
np.stack([a, b], axis=1)

In [ ]:
np.stack([a, b], axis=1).shape

Along axis 2


In [ ]:
np.stack([a, b], axis=2)

In [ ]:
np.stack([a, b], axis=2).shape

Tile


In [ ]:
a = np.array([[1, 2, 3], [4, 5, 6]])
np.tile(a, (2, 3))

Reshape or transpose


In [ ]:
a = np.array([[1, 2, 3], [4, 5, 6]])

Transpose


In [ ]:
a.T

Flatten


In [ ]:
a.flatten()

Reshape


In [ ]:
a = np.arange(6)
a

Row vector to column vector


In [ ]:
a.reshape([-1, 1])

Vector to matrix


In [ ]:
a.reshape([2, 3])

In [ ]:
a.reshape([3, 2])

Repeat


In [ ]:
a = np.arange(3)
a

In [ ]:
np.repeat(a, 5)

In [ ]:
a = np.arange(3).reshape([-1, 1])
a

In [ ]:
np.repeat(a, 5, axis=0)

In [ ]:
a = np.array([[1, 3, 5],[2, 4, 6]])
a

In [ ]:
np.repeat(a, 5, axis=0)

Sort

Return the indices that would sort an array


In [ ]:
a = np.array([8, 5, 1])
a

In [ ]:
a.argsort()

Sort an array by the $n^{\text{th}}$ column


In [ ]:
a = np.array([[4, 4, 2],
              [8, 5, 1],
              [7, 0, 0],
              [3, 1, 1],
              [3, 0, 5]])
a

In [ ]:
n = 0   # the column sorted by

In [ ]:
a[a[:,n].argsort()]

In [ ]:
n = 1   # the column sorted by

In [ ]:
a[a[:,n].argsort()]

In [ ]:
n = 2   # the column sorted by

In [ ]:
a[a[:,n].argsort()]

Aggregation / reduction


In [ ]:
a = np.array([[1, 2, 3], [4, 5, 6]])

Change the axis value in the following functions to aggregate along a given axis.


In [ ]:
np.sum(a, axis=None)

In [ ]:
np.cumsum(a, axis=None)

In [ ]:
np.diff(a.ravel())

In [ ]:
np.mean(a, axis=None)

In [ ]:
np.var(a, axis=None)

In [ ]:
np.std(a, axis=None)

In [ ]:
np.median(a, axis=None)

In [ ]:
np.min(a, axis=None)

In [ ]:
np.max(a, axis=None)

In [ ]:
np.prod(a, axis=None)

In [ ]:
np.cumprod(a, axis=None)

Compute the histogram of a set of data (with a specific binning)


In [ ]:
a = np.array([1, 1, 3, 2, 2, 2])
a

All but the last (righthand-most) bin is half-open. In other words, if bins is:

[1, 2, 3, 4]

then the first bin is [1, 2) (including 1, but excluding 2) and the second [2, 3). The last bin, however, is [3, 4], which includes 4.


In [ ]:
bins = np.array([1, 2, 3, 4])
bins

In [ ]:
hist, bins_ = np.histogram(a, bins=bins)
hist

Linear algebra

Dot product of two arrays


In [ ]:
a = np.array([1, 2, 3])
b = np.array([10, 20, 30])

In [ ]:
np.dot(a, b)

In [ ]:
a.dot(b)

Compute the (multiplicative) inverse of a matrix


In [ ]:
a = np.random.normal(size=(3, 3))
a

In [ ]:
np.linalg.inv(a)

Compute the eigenvalues and right eigenvectors of a square array


In [ ]:
a = np.random.normal(size=(3, 3))
a

In [ ]:
np.linalg.eig(a)

Singular Value Decomposition


In [ ]:
a = np.random.normal(size=(3, 3))
a

In [ ]:
U, s, V = np.linalg.svd(a)
print(U, s, V)

Solve a linear matrix equation, or system of linear scalar equations


In [ ]:
a = np.array([[3, 1], [1, 2]])
b = np.array([9, 8])
np.linalg.solve(a, b)

Diagonals

Extract the diagonal:


In [ ]:
a = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
np.diag(a)

Make a diagonal matrix:


In [ ]:
d = np.array([1, 2, 3])
np.diag(d)

Trace


In [ ]:
a = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
np.trace(a)

Upper and lower triangles of an array


In [ ]:
a = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])

In [ ]:
np.triu(a)

In [ ]:
np.tril(a)

Data types

Get type


In [ ]:
a = np.arange(-2., 2., 0.5)
a.dtype

Size in memory (in bytes)


In [ ]:
a = np.arange(-2., 2., 0.5)

Per item:


In [ ]:
a.itemsize

Full array:


In [ ]:
a.nbytes

Init


In [ ]:
a = np.zeros(3)
a.dtype

In [ ]:
a = np.zeros(3, dtype=np.bool)
a.dtype

In [ ]:
a = np.zeros(3, dtype=np.int)
a.dtype

In [ ]:
a = np.zeros(3, dtype=np.int8)
a.dtype

In [ ]:
a = np.zeros(3, dtype=np.uint8)
a.dtype

Conversions


In [ ]:
a = np.arange(-2., 2., 0.5)
a

In [ ]:
a.astype(np.bool)

In [ ]:
a.astype(np.int)

In [ ]:
a.astype(np.int8)

In [ ]:
a.astype(np.uint8)

Masked arrays

Without masked array


In [ ]:
a = np.array([[np.nan, 2, 3], [1, np.nan, 6]])
a

In [ ]:
a.min()

In [ ]:
np.nanmin(a)

In [ ]:
a.max()

In [ ]:
np.nanmax(a)

In [ ]:
a.mean()

In [ ]:
np.nanmean(a)

In [ ]:
a.shape

With masked array


In [ ]:
ma = np.ma.masked_where(np.isnan(a), a)
ma

In [ ]:
ma.min()

In [ ]:
ma.max()

In [ ]:
ma.mean()

In [ ]:
ma.shape