In [1]:
import numpy
print("numpy version: ",numpy.__version__)
!python --version


numpy version:  1.11.3
Python 3.6.0 :: Anaconda custom (x86_64)

Python lists


In [2]:
# python list holds many python objects
L = list(range(10))
L, type(L), type(L[0])


Out[2]:
([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], list, int)

In [3]:
L2 = list(str(i) for i in L)
L2, type(L2), type(L2[0])


Out[3]:
(['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'], list, str)

In [4]:
#because of python's dynamic typing, it can hold heterogenous lists
L3 = [True, "2", 3.0, 4]
[type(item) for item in L3]


Out[4]:
[bool, str, float, int]

In [5]:
#built-in array module in ptyhon since 3.3
import array
L  = list(range(10))
A = array.array('i', L)
A


Out[5]:
array('i', [0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

Creating Numpy arrays

1.np.array to create numpy array from python list


In [6]:
import numpy as np
Pylist = [1, 2, 3]

In [7]:
NumpyList = np.array(Pylist)

In [8]:
NumpyList


Out[8]:
array([1, 2, 3])

In [9]:
type(Pylist) , type(NumpyList)


Out[9]:
(list, numpy.ndarray)

In [10]:
# declare the type of list 
np.array([1,2,3,4], dtype=float)


Out[10]:
array([ 1.,  2.,  3.,  4.])

In [11]:
# multidimensional numpy array
np.array([range(i, i+3) for i in [2,4,6]])


Out[11]:
array([[2, 3, 4],
       [4, 5, 6],
       [6, 7, 8]])

2.from scratch


In [12]:
# create zeros
np.zeros(3, dtype=int)


Out[12]:
array([0, 0, 0])

In [13]:
# create 3x5 array filled with 1s
np.ones((3,5), dtype=float)


Out[13]:
array([[ 1.,  1.,  1.,  1.,  1.],
       [ 1.,  1.,  1.,  1.,  1.],
       [ 1.,  1.,  1.,  1.,  1.]])

In [14]:
# create an array of 5s 
np.full((2,4), 5, dtype=float)


Out[14]:
array([[ 5.,  5.,  5.,  5.],
       [ 5.,  5.,  5.,  5.]])

In [15]:
#array of linear sequence from 4 to 10 
np.arange(4, 10, 2)


Out[15]:
array([4, 6, 8])

In [16]:
# array of evenly spaced values
np.linspace(0, 1, 5)


Out[16]:
array([ 0.  ,  0.25,  0.5 ,  0.75,  1.  ])

In [17]:
# uniformly distributed random values between 0 and 1
np.random.random((3,4))


Out[17]:
array([[ 0.78772728,  0.43475258,  0.80173387,  0.36467609],
       [ 0.90600859,  0.69279576,  0.63677792,  0.58005333],
       [ 0.29011891,  0.63721852,  0.00373535,  0.36661782]])

In [18]:
# normally distributed values mean=0, std= 1
np.random.normal(0,1, (2,3))


Out[18]:
array([[-2.06895387,  1.33122329,  0.40084701],
       [-0.45159799, -1.480771  ,  0.11710757]])

In [19]:
# random integer, interval [0, 10)
np.random.randint(0, 10, (2,5))


Out[19]:
array([[1, 2, 3, 6, 2],
       [4, 2, 2, 3, 5]])

In [20]:
# identity matrix
np.eye(3)


Out[20]:
array([[ 1.,  0.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  0.,  1.]])

In [21]:
np.random.random_sample((3,4))


Out[21]:
array([[ 0.41257038,  0.70840527,  0.55898304,  0.96145733],
       [ 0.79404544,  0.83858273,  0.9691193 ,  0.23643289],
       [ 0.76567086,  0.8344795 ,  0.54639659,  0.37024142]])

In [22]:
#creare an uninitialized array of three integers. the values will be whatever happens
#already exists in that memory location
np.empty(3)


Out[22]:
array([ 1.,  1.,  1.])

Basics of Numpy Arrays

a. Attributes of arrays


In [23]:
import numpy as np
np.random.seed(0) #to generate the same random array each time this code run
x1 = np.random.randint(10, size=6) #one dimensional array
x2 = np.random.randint(10, size=(3,4)) #two dimensional array
x3 = np.random.randint(10, size=(3,4,5)) #three dimensional array

In [24]:
x1


Out[24]:
array([5, 0, 3, 3, 7, 9])

In [25]:
x2


Out[25]:
array([[3, 5, 2, 4],
       [7, 6, 8, 8],
       [1, 6, 7, 7]])

In [26]:
x3


Out[26]:
array([[[8, 1, 5, 9, 8],
        [9, 4, 3, 0, 3],
        [5, 0, 2, 3, 8],
        [1, 3, 3, 3, 7]],

       [[0, 1, 9, 9, 0],
        [4, 7, 3, 2, 7],
        [2, 0, 0, 4, 5],
        [5, 6, 8, 4, 1]],

       [[4, 9, 8, 1, 1],
        [7, 9, 9, 3, 6],
        [7, 2, 0, 3, 5],
        [9, 4, 4, 6, 4]]])

In [27]:
print("x3 ndim:", x3.ndim)
print("x3 shape:", x3.shape)
print("x3 size:", x3.size)
print("x3 data:", x3.data)
print("x3 dtype:", x3.dtype)
print("x3 item:", x3.item)
print("x3 itemsize:", x3.itemsize, "bytes")
print("x3 nbytes:", x3.nbytes, "bytes")


x3 ndim: 3
x3 shape: (3, 4, 5)
x3 size: 60
x3 data: <memory at 0x10db8bb88>
x3 dtype: int64
x3 item: <built-in method item of numpy.ndarray object at 0x112f9f260>
x3 itemsize: 8 bytes
x3 nbytes: 480 bytes

b. Array indexing


In [28]:
x1


Out[28]:
array([5, 0, 3, 3, 7, 9])

In [29]:
x1[4]


Out[29]:
7

In [30]:
x1[-1]


Out[30]:
9

In [31]:
x2


Out[31]:
array([[3, 5, 2, 4],
       [7, 6, 8, 8],
       [1, 6, 7, 7]])

In [32]:
x2[0, 0]


Out[32]:
3

In [33]:
# colon is for slicing. see the difference above and below.
x2[0:1]


Out[33]:
array([[3, 5, 2, 4]])

In [34]:
x2[1,1]


Out[34]:
6

In [35]:
x2[2,-3]


Out[35]:
6

In [36]:
# change the values in the array
x2[2,0] = 7
x2[2,1] = 7

In [37]:
x2


Out[37]:
array([[3, 5, 2, 4],
       [7, 6, 8, 8],
       [7, 7, 7, 7]])

c. Array slicing x[start : stop : step] default values 0


In [38]:
x = np.arange(10)
x


Out[38]:
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [39]:
x[:5] #first 5 elements


Out[39]:
array([0, 1, 2, 3, 4])

In [40]:
x[::2] #every other element


Out[40]:
array([0, 2, 4, 6, 8])

In [41]:
x[1::2] #every other element starting from 1


Out[41]:
array([1, 3, 5, 7, 9])

In [42]:
np.arange(1,10, 2) #example of arange method


Out[42]:
array([1, 3, 5, 7, 9])

In [43]:
x[::-1] #all elements, reversed


Out[43]:
array([9, 8, 7, 6, 5, 4, 3, 2, 1, 0])

In [44]:
# Multidimensional subarrays
x2


Out[44]:
array([[3, 5, 2, 4],
       [7, 6, 8, 8],
       [7, 7, 7, 7]])

In [45]:
x2[0] #first row


Out[45]:
array([3, 5, 2, 4])

In [46]:
x2[2] #third row


Out[46]:
array([7, 7, 7, 7])

In [47]:
x2[2:] == x2[2]


Out[47]:
array([[ True,  True,  True,  True]], dtype=bool)

In [48]:
x2[1:2, 1:3] #second row intersection between second and third column


Out[48]:
array([[6, 8]])

In [49]:
x2[::-1, ::-1] #can be reversed


Out[49]:
array([[7, 7, 7, 7],
       [8, 8, 6, 7],
       [4, 2, 5, 3]])

In [50]:
x2[:,0] #first column


Out[50]:
array([3, 7, 7])

In [51]:
# copying feature 
x2_sub = x2[:2 , :2]
x2_sub


Out[51]:
array([[3, 5],
       [7, 6]])

In [52]:
x2_sub[0,0] = 61
x2_sub


Out[52]:
array([[61,  5],
       [ 7,  6]])

In [53]:
# modifying subarray affects the original array as well
x2


Out[53]:
array([[61,  5,  2,  4],
       [ 7,  6,  8,  8],
       [ 7,  7,  7,  7]])

In [54]:
#use copy method to keep the original array not updated
x2_sub_copy = x2[:2, :2].copy()
x2_sub_copy


Out[54]:
array([[61,  5],
       [ 7,  6]])

In [55]:
x2_sub_copy[0,0] = 99
x2_sub_copy


Out[55]:
array([[99,  5],
       [ 7,  6]])

In [56]:
x2


Out[56]:
array([[61,  5,  2,  4],
       [ 7,  6,  8,  8],
       [ 7,  7,  7,  7]])

d. Reshaping of arrays


In [57]:
# use reshape method 
grid = np.arange(1,10).reshape((3,3))
grid


Out[57]:
array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [58]:
# convert one dimensional array into two dimensional row or column matrix
y = np.array([1,2,3])
y


Out[58]:
array([1, 2, 3])

In [59]:
# row vector via reshape
y.reshape((1,3))


Out[59]:
array([[1, 2, 3]])

In [60]:
#row vector via newaxis  
y[np.newaxis, :]


Out[60]:
array([[1, 2, 3]])

In [61]:
# column vector via reshape
y.reshape((3,1))


Out[61]:
array([[1],
       [2],
       [3]])

In [62]:
#column vector via newaxis 
y[:, np.newaxis]


Out[62]:
array([[1],
       [2],
       [3]])

e. Array concatination and splitting


In [63]:
x = np.array([1,2,3])
y = np.array([1,2,3])
z = np.array([61,61,61])

In [64]:
# use np.concatinate
np.concatenate([x,y,z])


Out[64]:
array([ 1,  2,  3,  1,  2,  3, 61, 61, 61])

In [65]:
# concat two dimensional arrays
grid


Out[65]:
array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [66]:
np.concatenate([grid, grid])


Out[66]:
array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9],
       [1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [67]:
np.concatenate([grid, grid], axis = 1)


Out[67]:
array([[1, 2, 3, 1, 2, 3],
       [4, 5, 6, 4, 5, 6],
       [7, 8, 9, 7, 8, 9]])

In [68]:
# vertical stack with vstack
np.vstack([x, grid])


Out[68]:
array([[1, 2, 3],
       [1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [69]:
k = np.array([[99], [99], [99]])
k


Out[69]:
array([[99],
       [99],
       [99]])

In [70]:
# horizontal stack with hstack
np.hstack([k, grid])


Out[70]:
array([[99,  1,  2,  3],
       [99,  4,  5,  6],
       [99,  7,  8,  9]])

In [71]:
# splitting of arrays
sp = np.arange(10)
sp


Out[71]:
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [72]:
x1, x2, x3 = np.split(sp, [1, 3])
x1 , x2, x3


Out[72]:
(array([0]), array([1, 2]), array([3, 4, 5, 6, 7, 8, 9]))

In [73]:
four = np.arange(16).reshape((4,4))
four


Out[73]:
array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])

In [74]:
f1, f2 = np.vsplit(four,[2] )
f1, f2


Out[74]:
(array([[0, 1, 2, 3],
        [4, 5, 6, 7]]), array([[ 8,  9, 10, 11],
        [12, 13, 14, 15]]))

Introducing Ufuncs


In [75]:
import numpy as np
np.random.seed(0)

def compute_reciprocal(values):
    output = np.empty(len(values))
    for i in range(len(values)):
        output[i] = 1.0 / values[i]
    return output

In [76]:
values = np.random.randint(1,10, size=5)
compute_reciprocal(values)


Out[76]:
array([ 0.16666667,  1.        ,  0.25      ,  0.25      ,  0.125     ])

In [77]:
big_array = np.random.randint(1, 100 , size=1000000)
%timeit compute_reciprocal(big_array)


2.22 s ± 31.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)

In [78]:
print(compute_reciprocal(values))
print(1.0 / values)


[ 0.16666667  1.          0.25        0.25        0.125     ]
[ 0.16666667  1.          0.25        0.25        0.125     ]

In [79]:
%timeit (1.0 / values)


1.78 µs ± 22.1 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)

In [80]:
# array arithmetic
x = np.arange(7)
print("x     = ", x)
print("x + 2 = ", x +2)
print("x * 2 = ", x * 2)
print("x / 2 = ", x / 2)
print("x //2 =", x // 2) #floor devision


x     =  [0 1 2 3 4 5 6]
x + 2 =  [2 3 4 5 6 7 8]
x * 2 =  [ 0  2  4  6  8 10 12]
x / 2 =  [ 0.   0.5  1.   1.5  2.   2.5  3. ]
x //2 = [0 0 1 1 2 2 3]

In [81]:
# more operations
((x ** 2) + 2 ) * (-x)


Out[81]:
array([   0,   -3,  -12,  -33,  -72, -135, -228])

In [82]:
# arithmetic operations implemented in numpy
np.multiply(np.add( np.power(x, 2), 2 ), np.negative(x))


Out[82]:
array([   0,   -3,  -12,  -33,  -72, -135, -228])

In [83]:
# absolute value
z = np.array([-5, -2, 0, 1])
abs(z)


Out[83]:
array([5, 2, 0, 1])

In [84]:
np.absolute(z)


Out[84]:
array([5, 2, 0, 1])

In [85]:
np.abs(z)


Out[85]:
array([5, 2, 0, 1])

In [86]:
np.absolute(z) == np.abs(z)


Out[86]:
array([ True,  True,  True,  True], dtype=bool)

In [87]:
# trigonometric functions
theta = np.linspace(0, np.pi, 3)
theta


Out[87]:
array([ 0.        ,  1.57079633,  3.14159265])

In [88]:
print(np.sin(theta))
print(np.tan(theta))


[  0.00000000e+00   1.00000000e+00   1.22464680e-16]
[  0.00000000e+00   1.63312394e+16  -1.22464680e-16]

In [89]:
print(x)
print(np.sin(x))
print(np.cos(x))


[0 1 2 3 4 5 6]
[ 0.          0.84147098  0.90929743  0.14112001 -0.7568025  -0.95892427
 -0.2794155 ]
[ 1.          0.54030231 -0.41614684 -0.9899925  -0.65364362  0.28366219
  0.96017029]

In [90]:
# Exponents and logarithms
print(x)
print(np.power(x, 2))
print(np.power(x, 3))


[0 1 2 3 4 5 6]
[ 0  1  4  9 16 25 36]
[  0   1   8  27  64 125 216]

In [91]:
from scipy import special
#gamma functions
x = [1, 5, 10]
print("gamma(x) = ", special.gamma(x))
print("ln|gamma(x) =", special.gammaln(x))


gamma(x) =  [  1.00000000e+00   2.40000000e+01   3.62880000e+05]
ln|gamma(x) = [  0.           3.17805383  12.80182748]

Advanced Ufunc features


In [92]:
x = np.arange(4)
np.multiply(x, 2, out=x)
x


Out[92]:
array([0, 2, 4, 6])

In [93]:
# x and y has to have same amount of elements
x = np.arange(5)
y = np.empty(5)
np.multiply(x, 2, out=y)
y


Out[93]:
array([ 0.,  2.,  4.,  6.,  8.])

In [94]:
k = np.zeros(10)
np.power(2, x, out=k[::2])
k


Out[94]:
array([  1.,   0.,   2.,   0.,   4.,   0.,   8.,   0.,  16.,   0.])

In [95]:
# aggregate
x = np.arange(1, 6)
print(np.add.reduce(x))
print(np.multiply.reduce(x))


15
120

In [96]:
np.add.accumulate(x)


Out[96]:
array([ 1,  3,  6, 10, 15])

Aggregations: Min. Max. Std. Median. Mean


In [97]:
x = np.random.randint(1, 1000, size=10000000)

In [98]:
# summing all values in an array
%timeit sum(x)   #python code
%timeit np.sum(x)  #numpy code


938 ms ± 61 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
6.35 ms ± 95.8 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)

In [99]:
# min and max
print(min(x), max(x) ) # python code
print(np.min(x) , np.max(x))


1 999
1 999

In [100]:
# again numpy operates much more quickly
%timeit (min(x), max(x))
%timeit (np.min(x) , np.max(x))


1.28 s ± 22.3 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
15.4 ms ± 207 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)

In [101]:
# Multidimensional aggregation
M = np.random.randint(1,5, size=(3,4))
M


Out[101]:
array([[4, 2, 4, 2],
       [4, 1, 4, 4],
       [1, 4, 3, 3]])

In [102]:
# sum of all values
M.sum()


Out[102]:
36

In [103]:
# min of each column
np.min(M, axis=0)


Out[103]:
array([1, 1, 3, 2])

In [104]:
# max of each column
np.max(M, axis=1)


Out[104]:
array([4, 4, 4])

In [105]:
# sum of each row
np.sum(M, axis=1)


Out[105]:
array([12, 13, 11])

Other aggreagation functions


In [106]:
n = [1,3,4]

In [107]:
#product of elements
np.prod(n)


Out[107]:
12

In [108]:
np.std(n)


Out[108]:
1.247219128924647

In [109]:
np.min(n), np.max(n)


Out[109]:
(1, 4)

In [110]:
# index of min and max
np.argmin(n), np.argmax(n)


Out[110]:
(0, 2)

In [111]:
np.median(n), np.mean(n)


Out[111]:
(3.0, 2.6666666666666665)

Example


In [112]:
!head -4 data/president_heights.csv


order,name,height(cm)
1,George Washington,189
2,John Adams,170
3,Thomas Jefferson,189

In [113]:
import pandas as pd
data = pd.read_csv('data/president_heights.csv', index_col='order')
data.head()


Out[113]:
name height(cm)
order
1 George Washington 189
2 John Adams 170
3 Thomas Jefferson 189
4 James Madison 163
5 James Monroe 183

In [114]:
data.info()


<class 'pandas.core.frame.DataFrame'>
Int64Index: 42 entries, 1 to 44
Data columns (total 2 columns):
name          42 non-null object
height(cm)    42 non-null int64
dtypes: int64(1), object(1)
memory usage: 1008.0+ bytes

In [115]:
data.describe()


Out[115]:
height(cm)
count 42.000000
mean 179.738095
std 7.015869
min 163.000000
25% 174.250000
50% 182.000000
75% 183.000000
max 193.000000

In [116]:
heights = np.array(data['height(cm)'])
len(heights)


Out[116]:
42

In [117]:
print("Mean height:", np.mean(heights))
print("Std of heigth:",np.std(heights, ddof=1))
print('min of height:', np.min(heights))
print('max of heights:', np.max(heights))
print('median of heights:', np.median(heights))


Mean height: 179.738095238
Std of heigth: 7.01586885536
min of height: 163
max of heights: 193
median of heights: 182.0

In [118]:
%matplotlib inline
import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')

In [119]:
plt.figure(figsize=(10,5) )
plt.hist(heights)
plt.title('Height distribution of US Presidents')
plt.xlabel('height (cm)')
plt.ylabel('number')
plt.show()


Broadcasting


In [120]:
a = np.array([0, 1, 2])
b = np.array([5, 5, 5])
a + b


Out[120]:
array([5, 6, 7])

In [121]:
a + 5


Out[121]:
array([5, 6, 7])

In [122]:
M = np.ones((3, 3))
M


Out[122]:
array([[ 1.,  1.,  1.],
       [ 1.,  1.,  1.],
       [ 1.,  1.,  1.]])

In [123]:
M + a


Out[123]:
array([[ 1.,  2.,  3.],
       [ 1.,  2.,  3.],
       [ 1.,  2.,  3.]])

In [124]:
x = np.arange(3)
y = np.arange(3)[:,np.newaxis]

In [125]:
print(x)
print(y)


[0 1 2]
[[0]
 [1]
 [2]]

In [126]:
x + y


Out[126]:
array([[0, 1, 2],
       [1, 2, 3],
       [2, 3, 4]])

In [127]:
#Broadcasting example 1
M = np.ones((2,3))
a = np.arange(3)

print("Shape of M:", M.shape)
print(M)
print(' ')
print("shape of a:",a.shape) 
print(a)
print('')
print("shape of M + a:", (M+a).shape) 
print(M + a)


Shape of M: (2, 3)
[[ 1.  1.  1.]
 [ 1.  1.  1.]]
 
shape of a: (3,)
[0 1 2]

shape of M + a: (2, 3)
[[ 1.  2.  3.]
 [ 1.  2.  3.]]

In [128]:
# Broadcasting example 2
a = np.arange(3).reshape((3,1))
print(a)
print('Shape of a:', a.shape)
print(' ')
b= np.arange(3)
print(b)
print('Shape of b:',b.shape)
print(' ')
print(a + b)
print('Shape of a + b:',(a+b).shape)


[[0]
 [1]
 [2]]
Shape of a: (3, 1)
 
[0 1 2]
Shape of b: (3,)
 
[[0 1 2]
 [1 2 3]
 [2 3 4]]
Shape of a + b: (3, 3)

In [129]:
# Broadcasting example 3
M = np.ones((3,2))
print(M)
print('Shape of M:', M.shape)
print('')
a = np.arange(3)
print(a)
print('Shape of a:', a.shape)
print('')
print("M + a throws an error. These arrays are incompatible.")


[[ 1.  1.]
 [ 1.  1.]
 [ 1.  1.]]
Shape of M: (3, 2)

[0 1 2]
Shape of a: (3,)

M + a throws an error. These arrays are incompatible.

In [130]:
# Broadcasting in practice
X = np.random.randint(0, 5,size=(3, 3))
X


Out[130]:
array([[3, 2, 2],
       [1, 2, 3],
       [0, 4, 0]])

In [131]:
#mean of each feature (row)
Xmean = X.mean(axis=0)
Xmean


Out[131]:
array([ 1.33333333,  2.66666667,  1.66666667])

In [132]:
Xcentered = Xmean - X
Xcentered


Out[132]:
array([[-1.66666667,  0.66666667, -0.33333333],
       [ 0.33333333,  0.66666667, -1.33333333],
       [ 1.33333333, -1.33333333,  1.66666667]])

In [133]:
Xcentered.mean(0)


Out[133]:
array([ -7.40148683e-17,  -1.48029737e-16,   7.40148683e-17])

In [134]:
# plotting two dimensional function z = f(x, y)
x = np.linspace(0, 5, 50)
y = np.linspace(0, 5, 50)[:,np.newaxis]

z = np.sin(x) ** 10 + np.cos(10 + y * x) * np.cos(x)

#plot the function
plt.imshow(z, origin='lower', extent=[0,5,0,5], cmap='viridis')
plt.colorbar();



In [135]:
rainfall = pd.read_csv('data/Seattle2014.csv')['PRCP']
inches = rainfall / 254 #1/10mm = 1 inch
inches.shape  # 365 days in a year


Out[135]:
(365,)

In [136]:
plt.hist(inches, bins=40);



In [137]:
#working with 2-dimensional array
f = np.random.randint(10, size=(3,4))
f


Out[137]:
array([[6, 4, 9, 2],
       [2, 4, 1, 2],
       [7, 4, 4, 1]])

In [138]:
np.count_nonzero(f < 6)


Out[138]:
9

In [139]:
# this code and cod above result the same. in this case, True=1 , False =0
np.sum(f < 6)


Out[139]:
9

In [140]:
# how many values less than 3 for each row
np.sum( f < 3, axis = 1)


Out[140]:
array([1, 3, 1])

In [141]:
# np.any() , np.all()
np.any(f == 0 ) , np.all(f==0)


Out[141]:
(False, False)

In [142]:
np.any(f > 5 , axis=1)


Out[142]:
array([ True, False,  True], dtype=bool)

In [143]:
# go back to Seattlle rainy days data

# how many days rained less than 4 inches and greater than 1 inch
np.sum((inches > 0.5) & (inches < 1))


Out[143]:
29

In [144]:
# Other examples
print("Number of days without rain:", np.sum(inches == 0))
print("Number of days with rain:", np.sum(inches != 0))
print("Days with more than 0.5 inches:", np.sum(inches > 0.5) )
print("Rainy days with < 0.1 inches:", np.sum((inches < 0.2) & (inches != 0))) 
#0.2 equals 0.1 inches


Number of days without rain: 215
Number of days with rain: 150
Days with more than 0.5 inches: 37
Rainy days with < 0.1 inches: 75

In [145]:
X


Out[145]:
array([[3, 2, 2],
       [1, 2, 3],
       [0, 4, 0]])

In [146]:
# less than 3 of all values
print(X < 3)
print('Return is one dimensional array')
print(X[X < 3])


[[False  True  True]
 [ True  True False]
 [ True False  True]]
Return is one dimensional array
[2 2 1 2 0 0]

In [147]:
lessThan3 = X[X < 3]
lessThan3


Out[147]:
array([2, 2, 1, 2, 0, 0])

In [148]:
# we can do statisctics on this new array
np.mean(lessThan3), np.min(lessThan3)


Out[148]:
(1.1666666666666667, 0)

In [149]:
# go back to Seattle rain data. 
# rainy days mask
rainy = inches > 0
#summer days june 21th 172. day
summer = (np.arange(365) - 172 < 90) & (np.arange(365) - 172 > 0)

In [150]:
print("Median precip on rainy days:", np.median(inches[rainy]))
print("Median precip on summer days:", np.median(inches[summer]))
print("Max precip on rainy days:", np.max(inches[rainy]))
print("Max precip on summer days:", np.max(inches[summer]))
print("Median precip on non-summer rainy days:", np.median(inches[rainy & ~summer]))


Median precip on rainy days: 0.194881889764
Median precip on summer days: 0.0
Max precip on rainy days: 1.83858267717
Max precip on summer days: 0.850393700787
Median precip on non-summer rainy days: 0.200787401575

In [151]:
t = np.arange(10)
t


Out[151]:
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [152]:
t[(t > 3 ) & (t < 6)]


Out[152]:
array([4, 5])

Fancy Indexing


In [153]:
rand = np.random.RandomState(42)
r = rand.randint(100, size=10)
r


Out[153]:
array([51, 92, 14, 71, 60, 20, 82, 86, 74, 74])

In [154]:
# instead of using 
r[2], r[5], r[7]


Out[154]:
(14, 20, 86)

In [155]:
# pass a single list or array of indices  
ind= [2, 5, 7]
r[ind]


Out[155]:
array([14, 20, 86])

In [156]:
ind2 = np.array([[3,7],
                [4,5]])
r[ind2]


Out[156]:
array([[71, 86],
       [60, 20]])

In [157]:
# combined indexing
Y = np.arange(12).reshape(3,4)
Y


Out[157]:
array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [158]:
# fancy and simple indices
Y[2, [2, 0, 1]]


Out[158]:
array([10,  8,  9])

In [159]:
# fancy indexing with slicing
Y[1:,[1,2]]


Out[159]:
array([[ 5,  6],
       [ 9, 10]])

Selecting random points


In [160]:
mean=[0,0]
cov=[[1,2],
     [2,5]]
X = rand.multivariate_normal(mean, cov, 100)
X.shape


Out[160]:
(100, 2)

In [161]:
X.shape[0]


Out[161]:
100

In [162]:
X[:5]


Out[162]:
array([[-0.644508  , -0.46220608],
       [ 0.7376352 ,  1.21236921],
       [ 0.88151763,  1.12795177],
       [ 2.04998983,  5.97778598],
       [-0.1711348 , -2.06258746]])

In [163]:
plt.scatter(X[:,0], X[:, 1]);



In [164]:
# select 20 random points 
indices = np.random.choice(X.shape[0], 20, replace=False)
indices


Out[164]:
array([41, 24, 79, 26, 38, 18,  2, 64, 67, 25, 13, 17, 14, 37, 31, 91, 80,
       98, 66, 92])

In [165]:
selection = X[indices] #fancy indexing here
selection, selection.shape


Out[165]:
(array([[-1.33461668, -3.03203218],
        [ 0.4299908 ,  0.36324254],
        [-0.31332021, -1.7895623 ],
        [ 0.32124996,  0.33137032],
        [ 0.35936721,  0.40554974],
        [ 1.53817376,  1.99973464],
        [ 0.88151763,  1.12795177],
        [ 0.13143756, -0.07833855],
        [-0.51172373, -1.40960773],
        [ 0.97253528,  3.53815717],
        [ 0.49514263,  1.18939673],
        [-0.53943081, -0.3478899 ],
        [ 0.0629872 ,  0.57349278],
        [ 0.44457363,  1.87828298],
        [-1.51101746, -3.2818741 ],
        [-0.16863279,  0.39422355],
        [ 1.12659538,  1.49627535],
        [-0.99658689, -2.35038099],
        [-0.14547282, -1.34125678],
        [ 0.12065651,  1.13236323]]), (20, 2))

In [166]:
plt.scatter(X[:,0], X[:,1], alpha=0.3)
plt.scatter(selection[:,0], selection[:,1], s=200,facecolor=None);



In [167]:
# modifying values with fancy indexing
x = np.arange(10)
i = np.array([2,1,8,4])
x[i] = 99
x


Out[167]:
array([ 0, 99, 99,  3, 99,  5,  6,  7, 99,  9])

In [168]:
x[i] -=10
x


Out[168]:
array([ 0, 89, 89,  3, 89,  5,  6,  7, 89,  9])

In [169]:
# to fix the nonintuitive result use at() ufunc
n = np.zeros(10)
i = np.array([2,1,8,4])
np.add.at(n, i, 1)
n


Out[169]:
array([ 0.,  1.,  1.,  0.,  1.,  0.,  0.,  0.,  1.,  0.])

In [170]:
# Bining Data
np.random.seed(42)
x = np.random.randn(100)

#compute hist by hand
bins = np.linspace(-5, 5, 20)
counts = np.zeros_like(bins)

#find appropreate bin for each x
i = np.searchsorted(bins, x)

#add 1 each of these bins
np.add.at(counts, i, 1)

#plot the result
plt.plot(bins, counts, linestyle= 'steps');



In [171]:
# this deos the same above. 
plt.hist(x, bins, histtype='step');



In [172]:
# matplotlib uses np.histogram function to create this chart. 
#lets compare both
print("NumPy routine:")
%timeit counts, edges = np.histogram(x, bins)
print('')
print('Custom routine:')
%timeit np.add.at(counts, np.searchsorted(bins,x), 1)


NumPy routine:
87.5 µs ± 17.7 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)

Custom routine:
17.7 µs ± 757 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)

In [173]:
# np.histogram?? ##for more information

Sorting Arrays


In [174]:
x = np.array([1,7,8,3,9,11])
np.sort(x)


Out[174]:
array([ 1,  3,  7,  8,  9, 11])

In [175]:
# if you prefer to sort array in-place, use .sort method  
print("x before:", x)
x.sort()
print("x after sorted in place:", x)


x before: [ 1  7  8  3  9 11]
x after sorted in place: [ 1  3  7  8  9 11]

In [176]:
# argsort returns indicies for the sorted elements
y = np.array([3,5,8,1,6])
i = np.argsort(y)
i


Out[176]:
array([3, 0, 1, 4, 2])

In [177]:
# you  may use fancy  indexing alter
y[i]


Out[177]:
array([1, 3, 5, 6, 8])

In [178]:
# sorting along rows and columns
rand = np.random.RandomState(42)
X = rand.randint(0, 10, (4,6))
X


Out[178]:
array([[6, 3, 7, 4, 6, 9],
       [2, 6, 7, 4, 3, 7],
       [7, 2, 5, 4, 1, 7],
       [5, 1, 4, 0, 9, 5]])

In [179]:
#sort each column of X
np.sort(X, axis=0)


Out[179]:
array([[2, 1, 4, 0, 1, 5],
       [5, 2, 5, 4, 3, 7],
       [6, 3, 7, 4, 6, 7],
       [7, 6, 7, 4, 9, 9]])

In [180]:
# sort each row of X
np.sort(X, axis =1)


Out[180]:
array([[3, 4, 6, 6, 7, 9],
       [2, 3, 4, 6, 7, 7],
       [1, 2, 4, 5, 7, 7],
       [0, 1, 4, 5, 5, 9]])

In [186]:
# partial sorting
# find the 3 smallest values in the array
x = np.array([11,3,6,70,4,1, 99])
np.partition(x, 3)


Out[186]:
array([ 3,  1,  4,  6, 11, 70, 99])

Example: k-Nearest Neighbors


In [192]:
#create random 10 points on a two dimensional array
X = rand.randint(0, 11,(10,2))
X


Out[192]:
array([[3, 7],
       [6, 8],
       [7, 4],
       [1, 4],
       [7, 9],
       [8, 8],
       [0, 8],
       [6, 8],
       [7, 0],
       [7, 7]])

In [193]:
plt.scatter(X[:,0], X[:,1], s=100);



In [200]:
X[:, np.newaxis,:] - X[np.newaxis, :, :]


Out[200]:
array([[[ 0,  0],
        [-3, -1],
        [-4,  3],
        [ 2,  3],
        [-4, -2],
        [-5, -1],
        [ 3, -1],
        [-3, -1],
        [-4,  7],
        [-4,  0]],

       [[ 3,  1],
        [ 0,  0],
        [-1,  4],
        [ 5,  4],
        [-1, -1],
        [-2,  0],
        [ 6,  0],
        [ 0,  0],
        [-1,  8],
        [-1,  1]],

       [[ 4, -3],
        [ 1, -4],
        [ 0,  0],
        [ 6,  0],
        [ 0, -5],
        [-1, -4],
        [ 7, -4],
        [ 1, -4],
        [ 0,  4],
        [ 0, -3]],

       [[-2, -3],
        [-5, -4],
        [-6,  0],
        [ 0,  0],
        [-6, -5],
        [-7, -4],
        [ 1, -4],
        [-5, -4],
        [-6,  4],
        [-6, -3]],

       [[ 4,  2],
        [ 1,  1],
        [ 0,  5],
        [ 6,  5],
        [ 0,  0],
        [-1,  1],
        [ 7,  1],
        [ 1,  1],
        [ 0,  9],
        [ 0,  2]],

       [[ 5,  1],
        [ 2,  0],
        [ 1,  4],
        [ 7,  4],
        [ 1, -1],
        [ 0,  0],
        [ 8,  0],
        [ 2,  0],
        [ 1,  8],
        [ 1,  1]],

       [[-3,  1],
        [-6,  0],
        [-7,  4],
        [-1,  4],
        [-7, -1],
        [-8,  0],
        [ 0,  0],
        [-6,  0],
        [-7,  8],
        [-7,  1]],

       [[ 3,  1],
        [ 0,  0],
        [-1,  4],
        [ 5,  4],
        [-1, -1],
        [-2,  0],
        [ 6,  0],
        [ 0,  0],
        [-1,  8],
        [-1,  1]],

       [[ 4, -7],
        [ 1, -8],
        [ 0, -4],
        [ 6, -4],
        [ 0, -9],
        [-1, -8],
        [ 7, -8],
        [ 1, -8],
        [ 0,  0],
        [ 0, -7]],

       [[ 4,  0],
        [ 1, -1],
        [ 0,  3],
        [ 6,  3],
        [ 0, -2],
        [-1, -1],
        [ 7, -1],
        [ 1, -1],
        [ 0,  7],
        [ 0,  0]]])

In [ ]:


In [ ]:


In [ ]:


In [ ]: