In [2]:
import numpy as np

In [3]:
arr1 = np.random.randint(10,30, size=8)
arr1


Out[3]:
array([12, 21, 19, 14, 16, 20, 24, 21])

In [4]:
arr2 = np.random.randint(20,200,size=50).reshape(5,10)  #method chaining - numbers from 0 to 50
arr2


Out[4]:
array([[ 94,  59,  25,  97,  71,  86, 178,  56,  43, 142],
       [ 87, 105, 170,  90, 110, 199, 124, 199,  81,  63],
       [158, 164, 138,  66, 144, 158,  81, 116, 132,  78],
       [121, 198,  55, 134, 118,  36,  71,  72,  23, 138],
       [ 53,  48, 106,  98,  81,  66,  34,  64,  31,  38]])

Array slicing

get elements using index like in a List


In [8]:
arr1[0]


Out[8]:
13

In [9]:
arr1[3]


Out[9]:
14

In [11]:
arr1[:3] #get the first 3 elements. Gets lower bounds inclusive, upper bound exclusive


Out[11]:
array([13, 10, 12])

In [14]:
arr1[2:] #lower bound inclusive


Out[14]:
array([12, 14, 12, 23, 13, 14])

In [15]:
arr1[2:5] #get elements at index 2,3,4


Out[15]:
array([12, 14, 12])

nD array slicing


In [16]:
arr2


Out[16]:
array([[123,  46,  28,  25,  47,  46,  25, 176,  73, 174],
       [152, 105,  87, 137,  41, 174,  87, 142,  32, 149],
       [184, 118,  22, 104, 177, 113, 170, 147, 125, 113],
       [ 47, 148,  95, 102, 125, 146, 109,  82,  42, 118],
       [109,  40,  28,  52,  61,  54, 129,  92,  82,  30]])

In [18]:
arr2[0,0] #style 1 - you pass in a list of indices


Out[18]:
123

In [19]:
arr2[0][0] #style 2 - parse it as list of lists - not so popular


Out[19]:
123

In [20]:
arr2[1] # get a full row


Out[20]:
array([152, 105,  87, 137,  41, 174,  87, 142,  32, 149])

Array dicing


In [22]:
#get the second column
arr2[:,1]


Out[22]:
array([ 46, 105, 118, 148,  40])

Thus, you specify : for all columns, followed by 1 for column. And you get a 1D array of the result


In [24]:
#get the 3rd row
arr2[2,:] #which is same as arr2[2]


Out[24]:
array([184, 118,  22, 104, 177, 113, 170, 147, 125, 113])

In [27]:
#get the center 3,3 elements - columns 4,5,6 and rows 1,2,3
arr2[1:4, 4:7]


Out[27]:
array([[ 41, 174,  87],
       [177, 113, 170],
       [125, 146, 109]])

Array broadcasting

NumPy allows bulk assigning values, just like in matlab


In [28]:
arr2


Out[28]:
array([[123,  46,  28,  25,  47,  46,  25, 176,  73, 174],
       [152, 105,  87, 137,  41, 174,  87, 142,  32, 149],
       [184, 118,  22, 104, 177, 113, 170, 147, 125, 113],
       [ 47, 148,  95, 102, 125, 146, 109,  82,  42, 118],
       [109,  40,  28,  52,  61,  54, 129,  92,  82,  30]])

In [31]:
arr2_subset = arr2[1:4, 4:7]
arr2_subset


Out[31]:
array([[ 41, 174,  87],
       [177, 113, 170],
       [125, 146, 109]])

In [34]:
arr2_subset[:,:] = 999 #assign this entire numpy the same values
arr2_subset


Out[34]:
array([[999, 999, 999],
       [999, 999, 999],
       [999, 999, 999]])

Deep copy

NumPy Arrays like Python objects are always shallow copied. Hence any modification made in derivative affects the source. Make deep copies using copy() method


In [35]:
arr2 #notice the 999 in the middle


Out[35]:
array([[123,  46,  28,  25,  47,  46,  25, 176,  73, 174],
       [152, 105,  87, 137, 999, 999, 999, 142,  32, 149],
       [184, 118,  22, 104, 999, 999, 999, 147, 125, 113],
       [ 47, 148,  95, 102, 999, 999, 999,  82,  42, 118],
       [109,  40,  28,  52,  61,  54, 129,  92,  82,  30]])

In [38]:
arr2_subset_a = arr2_subset
arr2_subset_a is arr2_subset


Out[38]:
True

Notice they are same obj in memory


In [36]:
arr3_subset = arr2_subset.copy()
arr3_subset


Out[36]:
array([[999, 999, 999],
       [999, 999, 999],
       [999, 999, 999]])

In [37]:
arr3_subset is arr2_subset


Out[37]:
False

Notice they are different objects in memory. Thus changing arr3_subset will not affect its source


In [39]:
arr3_subset[:,:] = 0.1
arr2_subset


Out[39]:
array([[999, 999, 999],
       [999, 999, 999],
       [999, 999, 999]])

Array searching

Use matlab style array searching


In [41]:
arr1


Out[41]:
array([13, 10, 12, 14, 12, 23, 13, 14])

Get all numbers greater than 15


In [42]:
arr1[arr1 > 15]


Out[42]:
array([23])

In [43]:
arr1[arr1 > 12]


Out[43]:
array([13, 14, 23, 13, 14])

just the condition returns a boolean matrix of same dimension as the one being queried


In [44]:
arr1 > 12


Out[44]:
array([ True, False, False,  True, False,  True,  True,  True], dtype=bool)

In [46]:
arr2[arr2 > 50] #looses the original shape as its impossible to keep the 2D shape


Out[46]:
array([123, 176,  73, 174, 152, 105,  87, 137, 999, 999, 999, 142, 149,
       184, 118, 104, 999, 999, 999, 147, 125, 113, 148,  95, 102, 999,
       999, 999,  82, 118, 109,  52,  61,  54, 129,  92,  82])

In [47]:
arr2[arr2 < 30]


Out[47]:
array([28, 25, 25, 22, 28])

Array operations

NumPy has operators like +, -, /, * overloaded so you can add two matrices like scalars


In [5]:
arr_sum = arr1 + arr1
arr_sum


Out[5]:
array([24, 42, 38, 28, 32, 40, 48, 42])

In [6]:
arr_cubed = arr1 ** 2
arr_cubed


Out[6]:
array([144, 441, 361, 196, 256, 400, 576, 441])

Similarly, you can add a scalar to an array and NumPy will broadcast that operation on all the elements.


In [7]:
arr_cubed - 100


Out[7]:
array([ 44, 341, 261,  96, 156, 300, 476, 341])

Caveats

Numpy does not throw errors for divide by zero or for 0/0. Intead it sets value to inf and nan.


In [9]:
arr_cubed[0] = 0
arr_cubed


Out[9]:
array([  0, 441, 361, 196, 256, 400, 576, 441])

In [10]:
arr_cubed / 0


C:\Users\atma6951\AppData\Local\Continuum\Anaconda3\envs\pychakras\lib\site-packages\ipykernel_launcher.py:1: RuntimeWarning: divide by zero encountered in true_divide
  """Entry point for launching an IPython kernel.
C:\Users\atma6951\AppData\Local\Continuum\Anaconda3\envs\pychakras\lib\site-packages\ipykernel_launcher.py:1: RuntimeWarning: invalid value encountered in true_divide
  """Entry point for launching an IPython kernel.
Out[10]:
array([ nan,  inf,  inf,  inf,  inf,  inf,  inf,  inf])

Thus 0/0 = nan and num/0 = inf

Universal functions

Numpy has a bunch of universal functions that work on the array elements one at a time and allow arrays to be used or treated as scalars.

Before writing a loop, look up the function list here