In [138]:
%matplotlib inline
%config InlineBackend.figure_format = 'png'
matplotlib.rcParams['figure.figsize'] = (4,4)
matplotlib.rcParams['xtick.labelsize'] = 35
matplotlib.rcParams['ytick.labelsize'] = 35
matplotlib.rcParams['font.size'] = 35
matplotlib.rcParams['axes.labelsize'] = 50
matplotlib.rcParams['axes.facecolor'] = 'white'
matplotlib.rcParams['axes.edgecolor'] = 'black'
matplotlib.rcParams['axes.linewidth'] = 5
matplotlib.rcParams['lines.linewidth'] = 10.0     # line width in points
#matplotlib.rcParams['lines.color'] = 'blue'    # has no affect on plot(); see axes.prop_cycle
matplotlib.rcParams['lines.markersize'] = 8           #

1. numpy

Unarguably the most important python library for scientists. Remember this?

2. Basic module NumPy

  • Basis for scientific computing with Python
  • A powerfull N-dimension array object
  • Basic and not so basic math operations
  • Linear algebra operations
  • Normally homogenous data (i.e. numbers)
  • random numbers, FFT, sorting, …

It is so huge, we will not even try to cover it in any detail here. What follows is more of an unboxing video rather than a product review.

Sebastian made a much more comprehensive presentation. See it here: https://github.com/MPIDS/Python-Course/blob/master/03-numpy-scipy/talks/basic-numpy.ipynb


In [1]:
import numpy as np # canonicalw ay of importing it

2.1 Arrays


In [6]:
a = np.array([1,2,3,4,5,6,7,8,9,10])   #lists with special powers
# or we could just do:
b = np.arange(1,11,1)
a == b


Out[6]:
array([ True,  True,  True,  True,  True,  True,  True,  True,  True,  True], dtype=bool)

You guessed right: arithmetic operations with numpy arrays happen elementwise:


In [9]:
print(a**2)
print(a+b)


[  1   4   9  16  25  36  49  64  81 100]
[ 2  4  6  8 10 12 14 16 18 20]

2.1.1 If you need to do some operation on each element of an array, don't even think of a for loop, use numpy:


In [19]:
x = np.arange(0,1000,1)

In [20]:
%%timeit
[i/2 for i in x]


10000 loops, best of 3: 138 µs per loop

In [21]:
%%timeit
x/2


The slowest run took 5.97 times longer than the fastest. This could mean that an intermediate result is being cached 
100000 loops, best of 3: 4.83 µs per loop

Try your hand on other operators:

  • Arithmetic: >, <, /
  • Math: np.sin, np.cos, np.round,np.log, np.sqrt...

2.2 Arrays are N-Dimensional (ndarray)

2.2.1 Dimensional


In [14]:
arr = np.array(5)
# although 0-d arrays are sometimes a bit special :(
print(arr)
print('The dimension is:', arr.ndim)
print('The shape is:', arr.shape)


5
The dimension is: 0
The shape is: ()

2.2.2 Dimensional


In [22]:
arr = np.array([4, 5, 6])
print(arr)
print('The dimension is:', arr.ndim)
print('The shape is:', arr.shape)


[4 5 6]
The dimension is: 1
The shape is: (3,)

2.2.3 Dimensional


In [23]:
arr = np.array([[1, 2],
                [3, 4],
                [5, 6]])
print(arr)
print('The dimension is:', arr.ndim)
print('The shape is:', arr.shape)


[[1 2]
 [3 4]
 [5 6]]
The dimension is: 2
The shape is: (3, 2)

2.3 Array creation functions (some of them)


In [37]:
print(np.zeros((3,6)))
print(np.ones((3,6)))
print(np.eye(4))
print(np.eye(4, k = 1))
print(np.diag([1,2,3,4]))
print(np.zeros_like(np.diag([1,2,3,4])))
print(np.linspace(0,1,10))


[[ 0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.]]
[[ 1.  1.  1.  1.  1.  1.]
 [ 1.  1.  1.  1.  1.  1.]
 [ 1.  1.  1.  1.  1.  1.]]
[[ 1.  0.  0.  0.]
 [ 0.  1.  0.  0.]
 [ 0.  0.  1.  0.]
 [ 0.  0.  0.  1.]]
[[ 0.  1.  0.  0.]
 [ 0.  0.  1.  0.]
 [ 0.  0.  0.  1.]
 [ 0.  0.  0.  0.]]
[[1 0 0 0]
 [0 2 0 0]
 [0 0 3 0]
 [0 0 0 4]]
[[0 0 0 0]
 [0 0 0 0]
 [0 0 0 0]
 [0 0 0 0]]
[ 0.          0.11111111  0.22222222  0.33333333  0.44444444  0.55555556
  0.66666667  0.77777778  0.88888889  1.        ]

There are so many more:

  • np.arange
  • np.tri
  • np.triu
  • np.tril

Some recreational array creation examples:


In [174]:
x = 1000 # flag width
y = 1000 # flag height

bw = 100 # width of the cross and slashes

flag = np.zeros((x, y), dtype = bool) # a white canvas (white==0)

cross = np.zeros_like(flag)
cross[y//2-bw:y//2+bw,:] = 1 # The horizontal stroke 
cross[:,y//2-bw:y//2+bw] = 1 # The vertical stroke

slashes = np.ones_like(flag, dtype = bool) # a black canvas
slash_left = np.logical_not(np.triu(slashes, k = bw) | np.tril(slashes, k = -bw))
slash_right = slash_left[::-1, :] # right slash by inverting the x axis

squarejack = (cross | slash_left | slash_right) # numpy logical or to combine all

In [158]:
def show(arr2D):
    """shows a 2D numpy array"""
    fig = plt.figure()
    ax = fig.add_subplot(1,1,1)
    h,w = arr2D.shape
    ax.set_aspect(w/h)
    ax.grid('off')
    ax.axis('off')
    ax.imshow(arr2D, interpolation='nearest')

In [175]:
show(squarejack)



In [176]:
board = np.ones((16,16))
board[::2,1::2] = 0
board[1::2,0::2] = 0

show(board)


2.4 random arrays:

  • np.random.rand(d0,d1,..)
  • np.random.randn(d0,d1,..)
  • np.random.uniform(low, high, size = (d0,d1,..))

2.5 Array reductions:

2.5.1 Statistical:


In [55]:
normdistarr = np.random.normal(loc = 2, scale = 0.3, size = 10000)
print(normdistarr.mean())
print(normdistarr.var())
print(normdistarr.ptp())
print(normdistarr.max())
print(normdistarr.min())


1.9987368244
0.0906360306437
2.41636908333
3.39745645769
0.981087374355

They can also be done on only one axis


In [56]:
normdist2D = np.random.normal(loc = 2, scale = 0.3, size = (10, 1000))
print(normdist2D.mean(axis = 1))
print(normdist2D.var(axis = 1))
print(normdist2D.ptp(axis = 1))
print(normdist2D.max(axis = 1))
print(normdist2D.min(axis = 1))


[ 2.00119499  2.00550948  1.99725594  2.00042294  1.99906724  2.00826339
  1.9908694   1.988672    1.9921972   2.00797976]
[ 0.09077598  0.09496839  0.09694581  0.0878613   0.08870739  0.08907957
  0.08480158  0.08979436  0.09010844  0.09806751]
[ 1.90408765  2.01038893  1.90427005  1.8945796   2.11008746  1.91691479
  1.74470253  1.84887678  1.90844309  2.03068144]
[ 2.9504641   2.94578872  2.98949858  2.95127403  2.96524016  2.942005
  2.84213345  2.87408944  2.91670318  2.94686157]
[ 1.04637645  0.93539979  1.08522853  1.05669443  0.8551527   1.02509021
  1.09743092  1.02521266  1.00826009  0.91618013]

2.5.2 Ones that help you find stuff:


In [57]:
print(normdistarr.argmax())
print(normdistarr.argmin())


7946
264

2.6 Broadcasting: Implicit repetition of arrays.


In [61]:
arr2d = np.array([[1,2,3],
                  [4,5,6]])
print(arr2d+3)
print(arr2d+[7,5,3])


[[4 5 6]
 [7 8 9]]
[[ 8  7  6]
 [11 10  9]]

2.7 Container manipulation:

2.7.1 Reshaping:


In [169]:
x = np.arange(12)
print(x)
print(x.reshape(3,4))
print(x.reshape(3,2,2))


[ 0  1  2  3  4  5  6  7  8  9 10 11]
[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]
[[[ 0  1]
  [ 2  3]]

 [[ 4  5]
  [ 6  7]]

 [[ 8  9]
  [10 11]]]

Maybe you want to let numpy infer the size along last dimention, because you are lazy like that:


In [170]:
print(x.reshape(3,-1))


[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]

In terms of our venerable SquareJack:


In [177]:
show(squarejack.reshape(500,-1))



In [178]:
show(squarejack)


2.7.2 More such stuff:

  • np.arange
  • np.concatenate
  • np.tile

In [179]:
show(np.tile(squarejack,4))



In [180]:
show(np.concatenate((squarejack, 1-squarejack)))


2.8 Indexing (crucial)

2.8.1 Picking an element:

Picking a single element from an array requires an integer along each dimension


In [77]:
arr = np.arange(10).reshape(2, 5)
print(arr)


[[0 1 2 3 4]
 [5 6 7 8 9]]

In [78]:
print(arr[0, 3])
print(arr[1, 2])


3
7

Somewhat confusing way of choosing more than one element:


In [82]:
print(arr[(0,1), (3,2)]) # arr[(x1, x2, x3,..), (y1, y2, y3, ...)]


[3 7]

2.8.2 Slicing:

  • [start:stop:step]
  • [start:stop] ==[start:stop:1]
  • [start:] == [start:END:1]
  • [::step] == [BEGIN:END:step]

In [92]:
arr = np.arange(12).reshape(3, 4)
print(arr)


[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]

In [75]:
arr[0:2,1:4]


Out[75]:
array([[1, 2, 3],
       [5, 6, 7]])

In [83]:
arr[:, 2:4]


Out[83]:
array([[2, 3],
       [7, 8]])

one can reverse arrays with negative step


In [93]:
arr[::-1, ::-1]


Out[93]:
array([[11, 10,  9,  8],
       [ 7,  6,  5,  4],
       [ 3,  2,  1,  0]])

Messing more with SquareJack:


In [183]:
show(squarejack[:500,:500])  # Top left corner



In [187]:
show(squarejack[500:0:-1,0:500]) # Top left corner, flipped lengthways



In [188]:
show(squarejack[500:0:-1,500:0:-1]) # Top left corner, flipped both ways


2.8.3 Boolean indexing


In [244]:
arr = np.arange(12).reshape(2,-1)

In [245]:
arr >7


Out[245]:
array([[False, False, False, False, False, False],
       [False, False,  True,  True,  True,  True]], dtype=bool)

In [246]:
arr[arr > 7]


Out[246]:
array([ 8,  9, 10, 11])

2.8.3.1 More powerful: np.where: filters an array, puts default values


In [247]:
arr[np.where(arr%2==0)] = 0

In [248]:
np.where(arr%2==0, 0, arr)


Out[248]:
array([[ 0,  1,  0,  3,  0,  5],
       [ 0,  7,  0,  9,  0, 11]])

SquareJack gets a taste of grey:


In [249]:
greyjack = squarejack.astype(float)

greyjack[np.where(greyjack[:, :500] < 0.1)] = 0.5
show(greyjack)


2.9 Stuff I still haven't maged to fit in:

matrix multiplication:


In [96]:
x = np.array([[1,2],
             [3,4]])
y = np.array([[5],
             [6]])
np.dot(x, y)              # The matrix product a*v


Out[96]:
array([[17],
       [39]])

3. matplotlib

A plotting library:


In [97]:
import matplotlib.pyplot as plt
%matplotlib inline

In [98]:
x=np.arange(0,5)
x_sq=x**2
plt.plot(x,x_sq)


Out[98]:
[<matplotlib.lines.Line2D at 0x7f53bf075e48>]

Histograms


In [99]:
large_array=np.random.randn(1000)

In [100]:
plt.hist(large_array)


Out[100]:
(array([   8.,   50.,  109.,  200.,  267.,  198.,  114.,   41.,   12.,    1.]),
 array([-2.95106208, -2.28926877, -1.62747547, -0.96568217, -0.30388886,
         0.35790444,  1.01969774,  1.68149104,  2.34328435,  3.00507765,
         3.66687095]),
 <a list of 10 Patch objects>)

Scipy

Scipy is a collection of many packages such as:

  • integrate: Integration and ordinary differential equation solvers
  • interpolate: Interpolation and smoothing splines
  • linalg: Linear algebra
  • ndimage: N-dimensional image processing
  • optimize: Optimization and root-finding routines
  • signal: Signal processing
  • spatial: Spatial data structures and algorithms
  • stats: Statistical distributions and functions

In [105]:
npts = 1000
error_scale = 0.01
x = np.random.uniform(0,1,npts)
y = 1.5*x + 3 + np.random.normal(scale = error_scale, size = npts)
plt.plot(x, y, 'o')


Out[105]:
[<matplotlib.lines.Line2D at 0x7f53bedb6400>]

In [108]:
from scipy.stats import linregress

In [112]:
slope, intercept, *_ = linregress(x, y)

In [115]:
plt.plot(x, y, 'o', label = "datapoints")
plt.plot(x, slope*x+intercept, '-', label = "linear fit")
plt.legend()
plt.xlabel("x")
plt.ylabel("y")


Out[115]:
<matplotlib.text.Text at 0x7f53bd4c7438>