# Hello World in Python
print("hello world")
/* Hello World in Java */
public class HelloWorld {
public static void main(String[] args) {
System.out.println("Hello, World");
}
}
In [ ]:
%matplotlib inline
In [ ]:
import seaborn as sns
data = sns.load_dataset("iris")
sns.pairplot(data, hue="species");
In [ ]:
# A silly function implemented in Python
def func_python(N):
d = 0.0
for i in range(N):
d += (i % 3 - 1) * i
return d
In [ ]:
# Use IPython timeit magic to time the execution
%timeit func_python(10000)
In [ ]:
%load_ext fortranmagic
In [ ]:
%%fortran
subroutine func_fort(n, d)
integer, intent(in) :: n
double precision, intent(out) :: d
integer :: i
d = 0
do i = 0, n - 1
d = d + (mod(i, 3) - 1) * i
end do
end subroutine func_fort
In [ ]:
%%file func_fortran.f
subroutine func_fort(n, d)
integer, intent(in) :: n
double precision, intent(out) :: d
integer :: i
d = 0
do i = 0, n - 1
d = d + (mod(i, 3) - 1) * i
end do
end subroutine func_fort
In [ ]:
# use f2py rather than f2py3 for Python 2
!f2py3 -c func_fortran.f -m func_fortran > /dev/null
In [ ]:
from func_fortran import func_fort
In [ ]:
%timeit func_fort(10000)
Fortran is about 100 times faster for this task!
We alluded to this yesterday, but languages tend to have a compromise between convenience and performance.
C, Fortran, etc.: static typing and compiled code leads to fast execution
Python, R, Matlab, IDL, etc.: dynamic typing and interpreted excecution leads to fast development
We like Python because our development time is generally more valuable than execution time. But sometimes speed can be an issue.
Use Numpy ufuncs to your advantage
Use Numpy aggregates to your advantage
Use Numpy broadcasting to your advantage
Use Numpy slicing and masking to your advantage
Use a tool like SWIG, cython or f2py to interface to compiled code.
Here we'll cover the first four, and leave the fifth strategy for a later session.
In [ ]:
a = [1, 3, 2, 4, 3, 1, 4, 2]
b = [val + 5 for val in a]
print(b)
In [ ]:
import numpy as np
a = np.array(a)
In [ ]:
b = a + 5 # element-wise
print(b)
In [ ]:
a = list(range(100000))
%timeit [val + 5 for val in a]
In [ ]:
a = np.array(a)
%timeit a + 5
In [ ]:
from random import random
c = [random() for i in range(100000)]
In [ ]:
%timeit min(c)
In [ ]:
c = np.array(c)
In [ ]:
%timeit c.min()
In [ ]:
M = np.random.randint(0, 10, (3, 5))
M
In [ ]:
M.sum()
In [ ]:
M.sum(axis=0)
In [ ]:
M.sum(axis=1)
In [ ]:
np.arange(3) + 5
In [ ]:
np.ones((3, 3)) + np.arange(3)
In [ ]:
np.arange(3).reshape((3, 1)) + np.arange(3)
In [ ]:
M = np.ones((2, 3))
M
In [ ]:
a = np.arange(3)
a
In [ ]:
M + a
In [ ]:
a = np.arange(3).reshape((3, 1))
a
In [ ]:
b = np.arange(3)
b
In [ ]:
a + b
In [ ]:
M = np.ones((3, 2))
M
In [ ]:
a = np.arange(3)
a
In [ ]:
M + a
Python lists can be indexed with integers or slices:
In [ ]:
L = [2, 3, 5, 7, 11]
In [ ]:
L[0] # integer index
In [ ]:
L[1:3] # slice for multiple elements
In [ ]:
L = np.array(L)
L
In [ ]:
L[0]
In [ ]:
L[1:3]
In [ ]:
L
In [ ]:
mask = np.array([False, True, True,
False, True])
L[mask]
In [ ]:
mask = (L < 4) | (L > 8) # "|" = "bitwise OR"
L[mask]
In [ ]:
L
In [ ]:
ind = [0, 4, 2]
L[ind]
In [ ]:
M = np.arange(6).reshape(2, 3)
M
In [ ]:
# multiple indices separated by comma
M[0, 1]
In [ ]:
# mixing slices and indices
M[:, 1]
In [ ]:
# masking the full array
M[abs(M - 3) < 2]
In [ ]:
# mixing fancy indexing and slicing
M[[1, 0], :2]
In [ ]:
# mixing masking and slicing
M[M.sum(axis=1) > 4, 1:]
In [ ]:
In [ ]:
# 1000 points in 3 dimensions
X = np.random.random((1000, 3))
X.shape
In [ ]:
# Broadcasting to find pairwise differences
diff = X.reshape(1000, 1, 3) - X
diff.shape
In [ ]:
# Aggregate to find pairwise distances
D = (diff ** 2).sum(2)
D.shape
In [ ]:
# set diagonal to infinity to skip self-neighbors
i = np.arange(1000)
D[i, i] = np.inf
In [ ]:
# print the indices of the nearest neighbor
i = np.argmin(D, 1)
print(i[:10])
In [ ]:
# double-check with scikit-learn
from sklearn.neighbors import NearestNeighbors
d, i = NearestNeighbors().fit(X).kneighbors(X, 2)
print(i[:10, 1])
It's all about moving loops into compiled code:
Use Numpy ufuncs to your advantage (eliminate loops!)
Use Numpy aggregates to your advantage (eliminate loops!)
Use Numpy broadcasting to your advantage (eliminate loops!)
Use Numpy slicing and masking to your advantage (eliminate loops!)
Use a tool like SWIG, cython or f2py to interface to compiled code.
In [ ]: