NumPy

NumPy is a Python library that provides ndarray (a multi-dimensional array structure) and a wide range of functions to perform operations on ndarray and mathematical operations. NumPy Documentation.


In [1]:
import numpy as np

The ndarray structure

The ndarray data structure is essential in NumPy. It is an n-dimensional array that can contain dtype objects.


In [2]:
my_vector = np.array([1, 2, 3, 4])
my_vector


Out[2]:
array([1, 2, 3, 4])

In [3]:
my_vector.shape


Out[3]:
(4,)

In [4]:
my_vector.dtype


Out[4]:
dtype('int32')

In [5]:
my_matrix = np.array([[1, 2], [3, 4]])
my_matrix


Out[5]:
array([[1, 2],
       [3, 4]])

In [6]:
my_matrix.shape


Out[6]:
(2, 2)

In [7]:
# Find the length of each element in bytes
my_matrix.itemsize


Out[7]:
4

In [8]:
my_matrix2 = np.array([[1, 2], [3, 4]], dtype=np.int8)
my_matrix2.itemsize


Out[8]:
1

Array creation methods


In [9]:
# Create an uninitialised array of specified shape and dtype
np.empty(shape=(4,4),dtype=np.int8)


Out[9]:
array([[0, 0, 0, 0],
       [0, 0, 0, 0],
       [0, 0, 0, 0],
       [0, 0, 0, 0]], dtype=int8)

In [10]:
np.zeros(4)


Out[10]:
array([ 0.,  0.,  0.,  0.])

In [11]:
np.zeros((4,4))


Out[11]:
array([[ 0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.]])

In [12]:
np.zeros((4,4)) + 42


Out[12]:
array([[ 42.,  42.,  42.,  42.],
       [ 42.,  42.,  42.,  42.],
       [ 42.,  42.,  42.,  42.],
       [ 42.,  42.,  42.,  42.]])

In [13]:
# Create a new zero matrix of the same shape as another matrix.
np.zeros_like(my_matrix)


Out[13]:
array([[0, 0],
       [0, 0]])

In [14]:
np.ones(4)


Out[14]:
array([ 1.,  1.,  1.,  1.])

In [15]:
np.ones((4,4))


Out[15]:
array([[ 1.,  1.,  1.,  1.],
       [ 1.,  1.,  1.,  1.],
       [ 1.,  1.,  1.,  1.],
       [ 1.,  1.,  1.,  1.]])

In [16]:
# Similar to Python's built-in range() function
np.arange(start=0, stop=10, step=2)


Out[16]:
array([0, 2, 4, 6, 8])

In [17]:
# Like arange() but instead of a step size, we specify the 
# number of values that we need. It generates lineary-spaced
# numbers in the given interval
np.linspace(start=10, stop=20, num=5)


Out[17]:
array([ 10. ,  12.5,  15. ,  17.5,  20. ])

In [18]:
# Generate numbers that are evenly spaced on a logarithmic scale
np.logspace(start=1, stop=2, num=10)


Out[18]:
array([  10.        ,   12.91549665,   16.68100537,   21.5443469 ,
         27.82559402,   35.93813664,   46.41588834,   59.94842503,
         77.42636827,  100.        ])

Aggregate methods (min and max)


In [19]:
arr1 = np.array([10, 87, 86,  5,  4, 38, 94, 76, 12, 17])
arr1


Out[19]:
array([10, 87, 86,  5,  4, 38, 94, 76, 12, 17])

In [20]:
arr1.max(), arr1.argmax()


Out[20]:
(94, 6)

In [21]:
arr1.min(), arr1.argmin()


Out[21]:
(4, 4)

In [22]:
arr1_copy = arr1.copy()
arr1_copy


Out[22]:
array([10, 87, 86,  5,  4, 38, 94, 76, 12, 17])

Summations


In [23]:
matrix1 = np.arange(1,26).reshape(5,5)
matrix1


Out[23]:
array([[ 1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10],
       [11, 12, 13, 14, 15],
       [16, 17, 18, 19, 20],
       [21, 22, 23, 24, 25]])

In [24]:
# Sum values in the matrix
matrix1.sum()


Out[24]:
325

In [25]:
# Sum values by column
matrix1.sum(0)


Out[25]:
array([55, 60, 65, 70, 75])

Transform a 1D array into a 2D array


In [26]:
prime_numbers = np.array([2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97])
prime_numbers


Out[26]:
array([ 2,  3,  5,  7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59,
       61, 67, 71, 73, 79, 83, 89, 97])

In [27]:
prime_numbers.reshape(5, 5)


Out[27]:
array([[ 2,  3,  5,  7, 11],
       [13, 17, 19, 23, 29],
       [31, 37, 41, 43, 47],
       [53, 59, 61, 67, 71],
       [73, 79, 83, 89, 97]])

Identity Matrix


In [28]:
np.eye(4)


Out[28]:
array([[ 1.,  0.,  0.,  0.],
       [ 0.,  1.,  0.,  0.],
       [ 0.,  0.,  1.,  0.],
       [ 0.,  0.,  0.,  1.]])

Random Numbers


In [29]:
# Use NumPy to generate four random numbers between 0 and 1
np.random.rand(4)


Out[29]:
array([ 0.17778166,  0.42168284,  0.15041776,  0.19598256])

In [30]:
np.random.rand(4, 4)


Out[30]:
array([[ 0.41711986,  0.28553897,  0.15126467,  0.69702041],
       [ 0.94215856,  0.15776642,  0.47677554,  0.49347121],
       [ 0.62380378,  0.67516128,  0.85931787,  0.61527131],
       [ 0.12670923,  0.20953841,  0.36805257,  0.92279489]])

In [31]:
np.random.randint(1, 101)


Out[31]:
95

In [32]:
# Generates 10 random integers between 1 and 100
np.random.randint(1, 101, 10)


Out[32]:
array([75, 73, 47, 54, 40, 28,  9, 18, 49,  9])

Sample from the normal distribution


In [33]:
# Generate four numbers from the normal distribution centred around zero
np.random.randn(4)


Out[33]:
array([ 0.35631723,  0.56252737, -0.9428327 , -0.81083621])

In [34]:
np.random.randn(4, 4)


Out[34]:
array([[ 0.09688495,  0.64863512,  0.42222238, -2.24968603],
       [ 1.2411619 ,  0.71394497,  1.76795526,  1.13842331],
       [ 0.55752422, -1.96477993, -2.51719341, -1.09681019],
       [ 0.97596094, -0.00330034,  0.0134228 , -0.06362289]])

Indexing and slicing


In [35]:
arr2 = np.arange(0, 101, 10)
arr2


Out[35]:
array([  0,  10,  20,  30,  40,  50,  60,  70,  80,  90, 100])

In [36]:
arr2[2]


Out[36]:
20

In [37]:
# Use Python's slice notation to fetch elements from the array
arr2[3:6]


Out[37]:
array([30, 40, 50])

In [38]:
arr2[3:]


Out[38]:
array([ 30,  40,  50,  60,  70,  80,  90, 100])

In [39]:
arr2[:4]


Out[39]:
array([ 0, 10, 20, 30])

In [40]:
# Boolean indexing 
arr2[arr2 > 5]


Out[40]:
array([ 10,  20,  30,  40,  50,  60,  70,  80,  90, 100])

In [41]:
arr2 > 5


Out[41]:
array([False,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True], dtype=bool)

In [42]:
arr_with_nans = np.array([np.nan, 1,2,np.nan,3,4,5])
arr_with_nans


Out[42]:
array([ nan,   1.,   2.,  nan,   3.,   4.,   5.])

In [43]:
# Get an array where NaN elements are omitted
arr_with_nans[~np.isnan(arr_with_nans)]


Out[43]:
array([ 1.,  2.,  3.,  4.,  5.])

In [44]:
matrix2 = np.arange(1, 26).reshape(5,5)
matrix2


Out[44]:
array([[ 1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10],
       [11, 12, 13, 14, 15],
       [16, 17, 18, 19, 20],
       [21, 22, 23, 24, 25]])

In [45]:
matrix2[1]


Out[45]:
array([ 6,  7,  8,  9, 10])

In [46]:
matrix2[1,2] # same as matrix[1][2]


Out[46]:
8

In [47]:
matrix2[1:4,1:4]


Out[47]:
array([[ 7,  8,  9],
       [12, 13, 14],
       [17, 18, 19]])

In [48]:
# Use ellipsis to get elements from the third column
matrix2[...,2]


Out[48]:
array([ 3,  8, 13, 18, 23])

In [49]:
# Fetch elements placed at corners of the 5x5 array
rows = np.array([[0,0],[4,4]])
cols = np.array([[0,4],[0,4]])
matrix2[rows, cols]


Out[49]:
array([[ 1,  5],
       [21, 25]])

Tiling

Tiling is a common and useful operation when we need to extend an array so that its shape matches another NumPy array. By doing so, we can use the 'tiled' array in applying some element-wise operation between them.


In [50]:
arr3 = np.array([9, 4, 4])
arr3


Out[50]:
array([9, 4, 4])

In [51]:
np.tile(arr3, (4, 1))


Out[51]:
array([[9, 4, 4],
       [9, 4, 4],
       [9, 4, 4],
       [9, 4, 4]])

In [52]:
np.tile(arr3, (5, 2))


Out[52]:
array([[9, 4, 4, 9, 4, 4],
       [9, 4, 4, 9, 4, 4],
       [9, 4, 4, 9, 4, 4],
       [9, 4, 4, 9, 4, 4],
       [9, 4, 4, 9, 4, 4]])

Broadcasting

Broadcasting is Numpy's terminology for performing mathematical operations between arrays of different shapes. If certain assumptions hold, the smaller of the two arrays is said to be broadcast to the size of the larger array in order to make the two arrays compatible so element-to-element operations can be performed. Read more.


In [53]:
macro_nutrients = np.array([[0.3, 2.5, 3.5],
                            [2.9, 27.5, 0],
                            [0.4, 1.3, 23.9],
                            [14.4, 6, 2.3]])
calories_per_macro = np.array([9, 4, 4])
macro_nutrients * calories_per_macro


Out[53]:
array([[   2.7,   10. ,   14. ],
       [  26.1,  110. ,    0. ],
       [   3.6,    5.2,   95.6],
       [ 129.6,   24. ,    9.2]])

In [54]:
arr4 = np.arange(0, 10)
arr4


Out[54]:
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [55]:
arr4[0:4] = 10
arr4


Out[55]:
array([10, 10, 10, 10,  4,  5,  6,  7,  8,  9])

Arithmetic


In [56]:
arr5 = np.arange(0, 10)
arr6 = np.arange(10, 20)

In [57]:
arr5


Out[57]:
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [58]:
arr6


Out[58]:
array([10, 11, 12, 13, 14, 15, 16, 17, 18, 19])

In [59]:
arr5 + arr6


Out[59]:
array([10, 12, 14, 16, 18, 20, 22, 24, 26, 28])

In [60]:
arr6 - arr5


Out[60]:
array([10, 10, 10, 10, 10, 10, 10, 10, 10, 10])

In [61]:
arr5 * arr6


Out[61]:
array([  0,  11,  24,  39,  56,  75,  96, 119, 144, 171])

In [62]:
arr5 + 10 # broadcasting


Out[62]:
array([10, 11, 12, 13, 14, 15, 16, 17, 18, 19])

In [63]:
arr6 - 10


Out[63]:
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [64]:
arr5 ** 2


Out[64]:
array([ 0,  1,  4,  9, 16, 25, 36, 49, 64, 81])

In [65]:
# NumPy generates a warning if we attempt to divide by zero.
arr5 / arr5


C:\Users\omar\Anaconda3\lib\site-packages\ipykernel_launcher.py:2: RuntimeWarning: invalid value encountered in true_divide
  
Out[65]:
array([ nan,   1.,   1.,   1.,   1.,   1.,   1.,   1.,   1.,   1.])

In [66]:
1 / arr5


C:\Users\omar\Anaconda3\lib\site-packages\ipykernel_launcher.py:1: RuntimeWarning: divide by zero encountered in true_divide
  """Entry point for launching an IPython kernel.
Out[66]:
array([        inf,  1.        ,  0.5       ,  0.33333333,  0.25      ,
        0.2       ,  0.16666667,  0.14285714,  0.125     ,  0.11111111])

Universal functions

A universal function (ufunc) is a mathematical function that operates on arrays in an element-by-element fashion supporting array broadcasting, type casting, and several other standard features. For more information read the ufunc documentation.


In [67]:
arr7 = np.array([2, 6, 7, 10, 45, 200])
arr7


Out[67]:
array([  2,   6,   7,  10,  45, 200])

In [68]:
# Computes the square root of each element of the array
np.sqrt(arr7)


Out[68]:
array([  1.41421356,   2.44948974,   2.64575131,   3.16227766,
         6.70820393,  14.14213562])

In [69]:
np.exp(arr7)


Out[69]:
array([  7.38905610e+00,   4.03428793e+02,   1.09663316e+03,
         2.20264658e+04,   3.49342711e+19,   7.22597377e+86])

In [70]:
np.log(arr7)


Out[70]:
array([ 0.69314718,  1.79175947,  1.94591015,  2.30258509,  3.80666249,
        5.29831737])