NumPy

Faster computation on ndarray


In [1]:
import numpy as np

In [2]:
np.arange(10)


Out[2]:
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [3]:
np.arange(1, 10, 2)


Out[3]:
array([1, 3, 5, 7, 9])

In [4]:
np.arange(1, 10, 0.5, dtype=np.float64)


Out[4]:
array([ 1. ,  1.5,  2. ,  2.5,  3. ,  3.5,  4. ,  4.5,  5. ,  5.5,  6. ,
        6.5,  7. ,  7.5,  8. ,  8.5,  9. ,  9.5])

In [5]:
m_data = np.arange(1, 10)

In [6]:
m_data.ndim


Out[6]:
1

In [7]:
m_data.shape


Out[7]:
(9,)

In [8]:
m_data.size, m_data.itemsize, m_data.dtype


Out[8]:
(9, 8, dtype('int64'))

In [9]:
m_data.data


Out[9]:
<read-write buffer for 0x30b0ae0, size 72, offset 0 at 0x31214b0>

In [10]:
list(m_data.data)


Out[10]:
['\x01',
 '\x00',
 '\x00',
 '\x00',
 '\x00',
 '\x00',
 '\x00',
 '\x00',
 '\x02',
 '\x00',
 '\x00',
 '\x00',
 '\x00',
 '\x00',
 '\x00',
 '\x00',
 '\x03',
 '\x00',
 '\x00',
 '\x00',
 '\x00',
 '\x00',
 '\x00',
 '\x00',
 '\x04',
 '\x00',
 '\x00',
 '\x00',
 '\x00',
 '\x00',
 '\x00',
 '\x00',
 '\x05',
 '\x00',
 '\x00',
 '\x00',
 '\x00',
 '\x00',
 '\x00',
 '\x00',
 '\x06',
 '\x00',
 '\x00',
 '\x00',
 '\x00',
 '\x00',
 '\x00',
 '\x00',
 '\x07',
 '\x00',
 '\x00',
 '\x00',
 '\x00',
 '\x00',
 '\x00',
 '\x00',
 '\x08',
 '\x00',
 '\x00',
 '\x00',
 '\x00',
 '\x00',
 '\x00',
 '\x00',
 '\t',
 '\x00',
 '\x00',
 '\x00',
 '\x00',
 '\x00',
 '\x00',
 '\x00']

Performance comparison


In [11]:
%%capture results

%timeit python_list = range(1, 1000)
%timeit np_array = np.arange(1, 1000)

In [12]:
print results


100000 loops, best of 3: 7.65 us per loop
1000000 loops, best of 3: 1.25 us per loop


In [13]:
%%capture results

%timeit python_list = range(1, 10000)
%timeit np_array = np.arange(1, 10000)

In [14]:
print results


10000 loops, best of 3: 87.4 us per loop
100000 loops, best of 3: 4.86 us per loop


In [15]:
%%capture results
list1, list2 = range(1, 10000), range(1, 10000)

%timeit list1 + list2

In [16]:
print results


10000 loops, best of 3: 83.6 us per loop


In [17]:
%%capture results

array1, array2 = np.arange(1, 10000), np.arange(1, 10000)

%timeit array1 + array2

In [18]:
print results


100000 loops, best of 3: 9.95 us per loop


In [19]:
%%timeit

for i in range(100):
    pass


1000000 loops, best of 3: 1.54 µs per loop

In [20]:
%%timeit

for i in np.arange(100):
    pass


100000 loops, best of 3: 8.38 µs per loop

In [21]:
8.33/1.57


Out[21]:
5.305732484076433

In [22]:
%%timeit

for i in range(1000000):
    pass


10 loops, best of 3: 26.7 ms per loop

In [23]:
%%timeit

for i in np.arange(1000000):
    pass


10 loops, best of 3: 72 ms per loop

In [24]:
71.2/25.8


Out[24]:
2.75968992248062

Most common functions


In [25]:
np.array([1, 2, 3, 4, 5, 6])


Out[25]:
array([1, 2, 3, 4, 5, 6])

In [26]:
# Multi dimensional array

np.array([[1, 2], [3, 4], [5, 6]])


Out[26]:
array([[1, 2],
       [3, 4],
       [5, 6]])

In [27]:
np.zeros((2, 4))


Out[27]:
array([[ 0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.]])

In [28]:
np.zeros((2, 3, 4), dtype=np.int64)


Out[28]:
array([[[0, 0, 0, 0],
        [0, 0, 0, 0],
        [0, 0, 0, 0]],

       [[0, 0, 0, 0],
        [0, 0, 0, 0],
        [0, 0, 0, 0]]])

In [29]:
# To generate values between two numbers 

np.linspace(1, 5, num=10)


Out[29]:
array([ 1.        ,  1.44444444,  1.88888889,  2.33333333,  2.77777778,
        3.22222222,  3.66666667,  4.11111111,  4.55555556,  5.        ])

In [30]:
np.linspace(1, 5, num=10, endpoint=False)


Out[30]:
array([ 1. ,  1.4,  1.8,  2.2,  2.6,  3. ,  3.4,  3.8,  4.2,  4.6])

In [31]:
# Array with random numbers

np.random.random(10)


Out[31]:
array([ 0.91532837,  0.42217102,  0.71645726,  0.82367485,  0.0996911 ,
        0.97587359,  0.85240052,  0.0881831 ,  0.28142204,  0.74541054])

In [32]:
np.random.random((3, 3))


Out[32]:
array([[ 0.38165553,  0.71101953,  0.48616148],
       [ 0.09852686,  0.12595853,  0.98626087],
       [ 0.57851689,  0.42367345,  0.55494517]])

Statistical Analysis


In [33]:
ds = np.random.random((2, 3))
ds


Out[33]:
array([[ 0.68863177,  0.16180561,  0.1704245 ],
       [ 0.91741515,  0.33599057,  0.98901736]])

In [34]:
np.max(ds)


Out[34]:
0.98901735549495173

In [35]:
np.max(ds, axis=0)


Out[35]:
array([ 0.91741515,  0.33599057,  0.98901736])

In [36]:
np.max(ds, axis=1)


Out[36]:
array([ 0.68863177,  0.98901736])

In [37]:
np.min(ds)


Out[37]:
0.16180560500041785

In [38]:
np.mean(ds)


Out[38]:
0.54388082334571886

In [39]:
np.median(ds)


Out[39]:
0.51231116617267847

In [40]:
np.std(ds)


Out[40]:
0.33845155073550653

In [41]:
np.sum(ds)


Out[41]:
3.2632849400743131

In [42]:
# Reshaping the matrix

print ds
np.reshape(ds, (6, 1))


[[ 0.68863177  0.16180561  0.1704245 ]
 [ 0.91741515  0.33599057  0.98901736]]
Out[42]:
array([[ 0.68863177],
       [ 0.16180561],
       [ 0.1704245 ],
       [ 0.91741515],
       [ 0.33599057],
       [ 0.98901736]])

In [43]:
# Flattening 

np.ravel(ds)


Out[43]:
array([ 0.68863177,  0.16180561,  0.1704245 ,  0.91741515,  0.33599057,
        0.98901736])

Slicing


In [44]:
ds


Out[44]:
array([[ 0.68863177,  0.16180561,  0.1704245 ],
       [ 0.91741515,  0.33599057,  0.98901736]])

In [45]:
ds[0:2]


Out[45]:
array([[ 0.68863177,  0.16180561,  0.1704245 ],
       [ 0.91741515,  0.33599057,  0.98901736]])

In [46]:
ds[0:2, 0]


Out[46]:
array([ 0.68863177,  0.91741515])

In [47]:
ds[0:2, 0:2]


Out[47]:
array([[ 0.68863177,  0.16180561],
       [ 0.91741515,  0.33599057]])

In [48]:
%matplotlib inline
import cv2
import matplotlib.pyplot as plt

img = cv2.imread('lena.jpg')
b,g,r = cv2.split(img)
img2 = cv2.merge([r,g,b])
plt.imshow(img2) # expect true color


Out[48]:
<matplotlib.image.AxesImage at 0x352af10>

In [49]:
img3 = img2[:256]
plt.imshow(img3)


Out[49]:
<matplotlib.image.AxesImage at 0x38aad50>

In [50]:
img4 = img2[:, 0:256]
plt.imshow(img4)


Out[50]:
<matplotlib.image.AxesImage at 0x3beae10>

In [51]:
img5 = img2[256:512, 256:512]
plt.imshow(img5)


Out[51]:
<matplotlib.image.AxesImage at 0x4083e50>