In [1]:
import time
print('Last updated: %s' %time.strftime('%d/%m/%Y'))
There are multiple ways in NumPy to extract particular rows or columns of interest from a NumPy array.
For example, given a 3x3-dimensional matrix A as shown below,
In [2]:
import numpy as np
A = np.array([[1,2,3],[4,5,6], [7,8,9]])
print(A)
we now want to extract the first and the last column. The most intuitive way would probably be by direct indexing:
In [3]:
print(A[:,[0,-1]])
But alternatively, we could also have used the numpy.take
function here:
In [4]:
print(np.take(A, indices=[0,-1], axis=1))
Equivalently, if we wanted to extract the first and last row:
In [5]:
print(A[[0,-1],:])
In [6]:
print(np.take(A, indices=[0,-1], axis=0))
Both approaches return
a new array object, not just a view/reference to an existing array, which will be demonstrated in the following example:
In [7]:
print('A before:\n', A)
B = A[[0,-1],:]
B[0,0] = 100
C = np.take(A, indices=[0,-1], axis=0)
C[0,0] = 150
print('\nB:\n', B)
print('\nC:\n', C)
print('\nA after:\n', A)
Note
C = np.take(A, indices=[0,-1], axis=0)
is equivalent to
np.take(A, indices=[0,-1], axis=0, out=C)
However, the former approach is the slightly faster one:
In [8]:
%timeit C = np.take(A, indices=[0,-1], axis=0)
%timeit np.take(A, indices=[0,-1], axis=0, out=C)
In the following benchmark, we will see how fast the two approaches are in returning the first column/row, a random column/row, and the last column/row of 2-dimensional arrays (in terms of their axes) consisting NxN elements.
In [11]:
import timeit
from numpy import take as np_take
orders_n = [10**i for i in range(1, 5)]
idx_slice_col, idx_slice_row, np_take_col, np_take_row = [], [], [], []
for n in orders_n:
rand_idx = np.random.randint(n)
A = np.random.randn(n, n)
idx_slice_col.append(min(timeit.Timer('A[:, [0, rand_idx, -1]]',
'from __main__ import A, rand_idx').repeat(repeat=3, number=1)))
np_take_col.append(min(timeit.Timer('np_take(A, indices=[0, rand_idx, -1], axis=1)',
'from __main__ import A, rand_idx, np_take').repeat(repeat=3, number=1)))
idx_slice_row.append(min(timeit.Timer('A[[0, rand_idx, -1], :]',
'from __main__ import A, rand_idx').repeat(repeat=3, number=1)))
np_take_row.append(min(timeit.Timer('np_take(A, indices=[0, rand_idx, -1], axis=0)',
'from __main__ import A, rand_idx, np_take').repeat(repeat=3, number=1)))
In [12]:
import platform
import multiprocessing
def print_sysinfo():
print('\nPython version :', platform.python_version())
print('compiler :', platform.python_compiler())
print('Numpy version :', np.__version__)
print('\nsystem :', platform.system())
print('release :', platform.release())
print('machine :', platform.machine())
print('processor :', platform.processor())
print('CPU count :', multiprocessing.cpu_count())
print('interpreter:', platform.architecture()[0])
print('\n\n')
In [13]:
%matplotlib inline
In [18]:
import matplotlib.pyplot as plt
def plot():
f, ax = plt.subplots(1, 2, figsize=(15,5))
ax[0].plot(orders_n, idx_slice_col, alpha=0.4, lw=3, marker='o',
label='Classic indexing\nA[:, [0, rand_idx, -1]]')
ax[0].plot(orders_n, np_take_col, alpha=0.4, lw=3, marker='o',
label='NumPy.take\nnp.take(A, indices=[0, rand_idx, -1], axis=1)')
ax[0].set_title('Getting columns from a NumPy array')
ax[1].plot(orders_n, idx_slice_row, alpha=0.4, lw=3, marker='o',
label='Classic indexing\nA[[0, rand_idx, -1], :]')
ax[1].plot(orders_n, np_take_row, alpha=0.4, lw=3, marker='o',
label='NumPy.take\nnp_take(A, indices=[0, rand_idx, -1], axis=0)')
ax[1].set_title('Getting rows from a NumPy array')
for x in ax:
x.legend(loc='upper left')
x.set_ylabel('time in seconds')
x.set_xlabel('NumPy array size')
plt.tight_layout()
plt.show()
In [19]:
plot()
print_sysinfo()