In [ ]:
import numpy as np
def non_vectorized_dot_product(x, y):
"""Return the sum of x[i] * y[j] for all pairs of indices i, j.
Example:
>>> my_dot_product(np.arange(20), np.arange(20))
"""
result = 0
for i in range(len(x)):
result += x[i] * y[i]
return result
signal = np.random.random(1000)
#print(signal)
In [ ]:
%timeit non_vectorized_dot_product(signal, signal)
In [ ]:
non_vectorized_dot_product(signal, signal)
Now, using Numpy's array multiplication and sum:
In [ ]:
%timeit np.sum(signal*signal)
In [ ]:
np.sum(signal*signal)
In [ ]:
# https://softwareengineering.stackexchange.com/questions/254475/how-do-i-move-away-from-the-for-loop-school-of-thought
def cleanup(x, missing=-1, value=0):
"""Return an array that's the same as x, except that where x ==
missing, it has value instead.
>>> cleanup(np.arange(-3, 3), value=10)
... # doctest: +NORMALIZE_WHITESPACE
array([-3, -2, 10, 0, 1, 2])
"""
result = []
for i in range(len(x)):
if x[i] == missing:
result.append(value)
else:
result.append(x[i])
return np.array(result)
array = np.arange(-8,8)
print(array)
print(cleanup(array, value=10, missing=0))
In [ ]:
array = np.arange(-1000,1000)
%timeit cleanup(array, value=10, missing=0)
print(array[995:1006])
print(cleanup(array, value=10, missing=0)[995:1006])
In [ ]:
# http://www.secnetix.de/olli/Python/list_comprehensions.hawk
# https://docs.python.org/3/library/functions.html#zip
value = [10]*2000
%timeit [xv if c else yv for (c,xv,yv) in zip(array == 0, value, array)]
print([xv if c else yv for (c,xv,yv) in zip(array == 0, value, array)][995:1006])
In [ ]:
# https://docs.scipy.org/doc/numpy-1.13.0/reference/generated/numpy.where.html
%timeit np.where(array == 0, 10, array)
print(np.where(array == 0, 10, array)[995:1006])
In [ ]:
from math import sin
import numpy as np
arr = np.arange(1000000)
%timeit [sin(i)**2 for i in arr]
In [ ]:
%timeit np.sin(arr)**2
In [ ]:
a = np.random.random(500000)
print(a[0:10])
b = np.copy(a)
%timeit global a; a = 10*a
a = 10*a
print(a[0:10])
In [ ]:
a = np.copy(b)
print(a[0:10])
%timeit global a ; a *= 10
a *= 10
print(a[0:10])
In [ ]:
a = np.random.rand(100,50)
b = np.copy(a)
In [ ]:
def mult(x, val):
for i in range(x.shape[0]):
for j in range(x.shape[1]):
x[i][j] /= val
%timeit -n 1 -r 1 mult(a, 10)
In [ ]:
a = np.copy(b)
def mult2(x, val):
for j in range(x.shape[1]):
for i in range(x.shape[0]):
x[i][j] /= val
%timeit -n 1 -r 1 mult2(a, 10)
In [ ]:
# http://www.scipy-lectures.org/advanced/optimizing/
# https://docs.scipy.org/doc/numpy-1.13.0/reference/generated/numpy.sum.html
In [ ]:
c = np.zeros((1000, 1000), order='C')
In [ ]:
%timeit c.sum(axis=0)
c.sum(axis=0).shape
In [ ]:
%timeit c.sum(axis=1)
c.sum(axis=1).shape
When you want to speed-up your code or simply when you need to reuse C code, it is possible to use it from Python. There are several alternatives:
.ddl/.so/.dylib) from Python.A function to optimize:
In [ ]:
!cat sum_array_lib.py
In [ ]:
# Please, restart the kernel to ensure that the module sum_array_lib is re-loaded
!rm -f sum_array_lib.cpython*.so
import sum_array_lib
import array as arr
a = arr.array('d', [i for i in range(100000)])
#a = [1 for i in range(100000)]
%timeit sum_array_lib.sum_array(a, len(a))
sum = sum_array_lib.sum_array(a, len(a))
print(sum)
Python with C data types. Another interesting link.
In [ ]:
!cp sum_array_lib.py sum_array_lib.pyx
In [ ]:
!cat sum_array_lib.pyx
In [ ]:
!cat Cython/basic/setup.py
In [13]:
!rm -f sum_array_lib.cpython*.so
!python Cython/basic/setup.py build_ext --inplace
In [ ]:
# Please, restart the kernel to ensure that the module sum_array_lib is re-loaded
import sum_array_lib
import array as arr
a = arr.array('d', [i for i in range(100000)])
#a = [1.1 for i in range(100000)]
%timeit sum_array_lib.sum_array(a, len(a))
sum = sum_array_lib.sum_array(a, len(a))
print(sum)
In [ ]:
!cat Cython/cdef/sum_array_lib.pyx
In [17]:
!cat Cython/cdef/setup.py
In [18]:
# Please, restart the kernel to ensure that the module sum_array_lib is re-loaded
!rm sum_array_lib.cpython*.so
!python Cython/cdef/setup.py build_ext --inplace
In [1]:
# Please, restart the kernel to ensure that the module sum_array_lib is re-loaded
import array as arr
import sum_array_lib
#import numpy as np
#a = np.arange(100000)
a = arr.array('d', [i for i in range(100000)])
%timeit sum_array_lib.sum_array(a, len(a))
print(sum)
In [16]:
!cat sum_array_lib.c
In [5]:
!cat sum_array.c
In [6]:
!gcc -O3 sum_array.c -o sum_array
!./sum_array
In [7]:
!cat sum_array_module.c
In [8]:
!cat setup.py
In [2]:
!python setup.py build_ext --inplace
In [10]:
import sum_array_module
import numpy as np
a = np.arange(100000)
%timeit sum_array_module.sumArray(a)
print(sum)
However, remember: vectorize when possible!
In [11]:
%timeit np.sum(a)
print(sum)