In [1]:

    
import numpy
print("numpy version: ",numpy.__version__)
!python --version









    



numpy version:  1.11.3
Python 3.6.0 :: Anaconda custom (x86_64)

Python lists



In [2]:

    
# python list holds many python objects
L = list(range(10))
L, type(L), type(L[0])









    Out[2]:





([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], list, int)



In [3]:

    
L2 = list(str(i) for i in L)
L2, type(L2), type(L2[0])









    Out[3]:





(['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'], list, str)



In [4]:

    
#because of python's dynamic typing, it can hold heterogenous lists
L3 = [True, "2", 3.0, 4]
[type(item) for item in L3]









    Out[4]:





[bool, str, float, int]



In [5]:

    
#built-in array module in ptyhon since 3.3
import array
L  = list(range(10))
A = array.array('i', L)
A









    Out[5]:





array('i', [0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

Creating Numpy arrays

1.np.array to create numpy array from python list



In [6]:

    
import numpy as np
Pylist = [1, 2, 3]



In [7]:

    
NumpyList = np.array(Pylist)



In [8]:

    
NumpyList









    Out[8]:





array([1, 2, 3])



In [9]:

    
type(Pylist) , type(NumpyList)









    Out[9]:





(list, numpy.ndarray)



In [10]:

    
# declare the type of list 
np.array([1,2,3,4], dtype=float)









    Out[10]:





array([ 1.,  2.,  3.,  4.])



In [11]:

    
# multidimensional numpy array
np.array([range(i, i+3) for i in [2,4,6]])









    Out[11]:





array([[2, 3, 4],
       [4, 5, 6],
       [6, 7, 8]])

2.from scratch



In [12]:

    
# create zeros
np.zeros(3, dtype=int)









    Out[12]:





array([0, 0, 0])



In [13]:

    
# create 3x5 array filled with 1s
np.ones((3,5), dtype=float)









    Out[13]:





array([[ 1.,  1.,  1.,  1.,  1.],
       [ 1.,  1.,  1.,  1.,  1.],
       [ 1.,  1.,  1.,  1.,  1.]])



In [14]:

    
# create an array of 5s 
np.full((2,4), 5, dtype=float)









    Out[14]:





array([[ 5.,  5.,  5.,  5.],
       [ 5.,  5.,  5.,  5.]])



In [15]:

    
#array of linear sequence from 4 to 10 
np.arange(4, 10, 2)









    Out[15]:





array([4, 6, 8])



In [16]:

    
# array of evenly spaced values
np.linspace(0, 1, 5)









    Out[16]:





array([ 0.  ,  0.25,  0.5 ,  0.75,  1.  ])



In [17]:

    
# uniformly distributed random values between 0 and 1
np.random.random((3,4))









    Out[17]:





array([[ 0.78772728,  0.43475258,  0.80173387,  0.36467609],
       [ 0.90600859,  0.69279576,  0.63677792,  0.58005333],
       [ 0.29011891,  0.63721852,  0.00373535,  0.36661782]])



In [18]:

    
# normally distributed values mean=0, std= 1
np.random.normal(0,1, (2,3))









    Out[18]:





array([[-2.06895387,  1.33122329,  0.40084701],
       [-0.45159799, -1.480771  ,  0.11710757]])



In [19]:

    
# random integer, interval [0, 10)
np.random.randint(0, 10, (2,5))









    Out[19]:





array([[1, 2, 3, 6, 2],
       [4, 2, 2, 3, 5]])



In [20]:

    
# identity matrix
np.eye(3)









    Out[20]:





array([[ 1.,  0.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  0.,  1.]])



In [21]:

    
np.random.random_sample((3,4))









    Out[21]:





array([[ 0.41257038,  0.70840527,  0.55898304,  0.96145733],
       [ 0.79404544,  0.83858273,  0.9691193 ,  0.23643289],
       [ 0.76567086,  0.8344795 ,  0.54639659,  0.37024142]])



In [22]:

    
#creare an uninitialized array of three integers. the values will be whatever happens
#already exists in that memory location
np.empty(3)









    Out[22]:





array([ 1.,  1.,  1.])

Basics of Numpy Arrays

a. Attributes of arrays



In [23]:

    
import numpy as np
np.random.seed(0) #to generate the same random array each time this code run
x1 = np.random.randint(10, size=6) #one dimensional array
x2 = np.random.randint(10, size=(3,4)) #two dimensional array
x3 = np.random.randint(10, size=(3,4,5)) #three dimensional array



In [24]:

    
x1









    Out[24]:





array([5, 0, 3, 3, 7, 9])



In [25]:

    
x2









    Out[25]:





array([[3, 5, 2, 4],
       [7, 6, 8, 8],
       [1, 6, 7, 7]])



In [26]:

    
x3









    Out[26]:





array([[[8, 1, 5, 9, 8],
        [9, 4, 3, 0, 3],
        [5, 0, 2, 3, 8],
        [1, 3, 3, 3, 7]],

       [[0, 1, 9, 9, 0],
        [4, 7, 3, 2, 7],
        [2, 0, 0, 4, 5],
        [5, 6, 8, 4, 1]],

       [[4, 9, 8, 1, 1],
        [7, 9, 9, 3, 6],
        [7, 2, 0, 3, 5],
        [9, 4, 4, 6, 4]]])



In [27]:

    
print("x3 ndim:", x3.ndim)
print("x3 shape:", x3.shape)
print("x3 size:", x3.size)
print("x3 data:", x3.data)
print("x3 dtype:", x3.dtype)
print("x3 item:", x3.item)
print("x3 itemsize:", x3.itemsize, "bytes")
print("x3 nbytes:", x3.nbytes, "bytes")









    



x3 ndim: 3
x3 shape: (3, 4, 5)
x3 size: 60
x3 data: <memory at 0x10db8bb88>
x3 dtype: int64
x3 item: <built-in method item of numpy.ndarray object at 0x112f9f260>
x3 itemsize: 8 bytes
x3 nbytes: 480 bytes

b. Array indexing



In [28]:

    
x1









    Out[28]:





array([5, 0, 3, 3, 7, 9])



In [29]:

    
x1[4]









    Out[29]:





7



In [30]:

    
x1[-1]









    Out[30]:





9



In [31]:

    
x2









    Out[31]:





array([[3, 5, 2, 4],
       [7, 6, 8, 8],
       [1, 6, 7, 7]])



In [32]:

    
x2[0, 0]









    Out[32]:





3



In [33]:

    
# colon is for slicing. see the difference above and below.
x2[0:1]









    Out[33]:





array([[3, 5, 2, 4]])



In [34]:

    
x2[1,1]









    Out[34]:





6



In [35]:

    
x2[2,-3]









    Out[35]:





6



In [36]:

    
# change the values in the array
x2[2,0] = 7
x2[2,1] = 7



In [37]:

    
x2









    Out[37]:





array([[3, 5, 2, 4],
       [7, 6, 8, 8],
       [7, 7, 7, 7]])

c. Array slicing x[start : stop : step] default values 0



In [38]:

    
x = np.arange(10)
x









    Out[38]:





array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])



In [39]:

    
x[:5] #first 5 elements









    Out[39]:





array([0, 1, 2, 3, 4])



In [40]:

    
x[::2] #every other element









    Out[40]:





array([0, 2, 4, 6, 8])



In [41]:

    
x[1::2] #every other element starting from 1









    Out[41]:





array([1, 3, 5, 7, 9])



In [42]:

    
np.arange(1,10, 2) #example of arange method









    Out[42]:





array([1, 3, 5, 7, 9])



In [43]:

    
x[::-1] #all elements, reversed









    Out[43]:





array([9, 8, 7, 6, 5, 4, 3, 2, 1, 0])



In [44]:

    
# Multidimensional subarrays
x2









    Out[44]:





array([[3, 5, 2, 4],
       [7, 6, 8, 8],
       [7, 7, 7, 7]])



In [45]:

    
x2[0] #first row









    Out[45]:





array([3, 5, 2, 4])



In [46]:

    
x2[2] #third row









    Out[46]:





array([7, 7, 7, 7])



In [47]:

    
x2[2:] == x2[2]









    Out[47]:





array([[ True,  True,  True,  True]], dtype=bool)



In [48]:

    
x2[1:2, 1:3] #second row intersection between second and third column









    Out[48]:





array([[6, 8]])



In [49]:

    
x2[::-1, ::-1] #can be reversed









    Out[49]:





array([[7, 7, 7, 7],
       [8, 8, 6, 7],
       [4, 2, 5, 3]])



In [50]:

    
x2[:,0] #first column









    Out[50]:





array([3, 7, 7])



In [51]:

    
# copying feature 
x2_sub = x2[:2 , :2]
x2_sub









    Out[51]:





array([[3, 5],
       [7, 6]])



In [52]:

    
x2_sub[0,0] = 61
x2_sub









    Out[52]:





array([[61,  5],
       [ 7,  6]])



In [53]:

    
# modifying subarray affects the original array as well
x2









    Out[53]:





array([[61,  5,  2,  4],
       [ 7,  6,  8,  8],
       [ 7,  7,  7,  7]])



In [54]:

    
#use copy method to keep the original array not updated
x2_sub_copy = x2[:2, :2].copy()
x2_sub_copy









    Out[54]:





array([[61,  5],
       [ 7,  6]])



In [55]:

    
x2_sub_copy[0,0] = 99
x2_sub_copy









    Out[55]:





array([[99,  5],
       [ 7,  6]])



In [56]:

    
x2









    Out[56]:





array([[61,  5,  2,  4],
       [ 7,  6,  8,  8],
       [ 7,  7,  7,  7]])

d. Reshaping of arrays



In [57]:

    
# use reshape method 
grid = np.arange(1,10).reshape((3,3))
grid









    Out[57]:





array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])



In [58]:

    
# convert one dimensional array into two dimensional row or column matrix
y = np.array([1,2,3])
y









    Out[58]:





array([1, 2, 3])



In [59]:

    
# row vector via reshape
y.reshape((1,3))









    Out[59]:





array([[1, 2, 3]])



In [60]:

    
#row vector via newaxis  
y[np.newaxis, :]









    Out[60]:





array([[1, 2, 3]])



In [61]:

    
# column vector via reshape
y.reshape((3,1))









    Out[61]:





array([[1],
       [2],
       [3]])



In [62]:

    
#column vector via newaxis 
y[:, np.newaxis]









    Out[62]:





array([[1],
       [2],
       [3]])

e. Array concatination and splitting



In [63]:

    
x = np.array([1,2,3])
y = np.array([1,2,3])
z = np.array([61,61,61])



In [64]:

    
# use np.concatinate
np.concatenate([x,y,z])









    Out[64]:





array([ 1,  2,  3,  1,  2,  3, 61, 61, 61])



In [65]:

    
# concat two dimensional arrays
grid









    Out[65]:





array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])



In [66]:

    
np.concatenate([grid, grid])









    Out[66]:





array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9],
       [1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])



In [67]:

    
np.concatenate([grid, grid], axis = 1)









    Out[67]:





array([[1, 2, 3, 1, 2, 3],
       [4, 5, 6, 4, 5, 6],
       [7, 8, 9, 7, 8, 9]])



In [68]:

    
# vertical stack with vstack
np.vstack([x, grid])









    Out[68]:





array([[1, 2, 3],
       [1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])



In [69]:

    
k = np.array([[99], [99], [99]])
k









    Out[69]:





array([[99],
       [99],
       [99]])



In [70]:

    
# horizontal stack with hstack
np.hstack([k, grid])









    Out[70]:





array([[99,  1,  2,  3],
       [99,  4,  5,  6],
       [99,  7,  8,  9]])



In [71]:

    
# splitting of arrays
sp = np.arange(10)
sp









    Out[71]:





array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])



In [72]:

    
x1, x2, x3 = np.split(sp, [1, 3])
x1 , x2, x3









    Out[72]:





(array([0]), array([1, 2]), array([3, 4, 5, 6, 7, 8, 9]))



In [73]:

    
four = np.arange(16).reshape((4,4))
four









    Out[73]:





array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])



In [74]:

    
f1, f2 = np.vsplit(four,[2] )
f1, f2









    Out[74]:





(array([[0, 1, 2, 3],
        [4, 5, 6, 7]]), array([[ 8,  9, 10, 11],
        [12, 13, 14, 15]]))

Introducing Ufuncs



In [75]:

    
import numpy as np
np.random.seed(0)

def compute_reciprocal(values):
    output = np.empty(len(values))
    for i in range(len(values)):
        output[i] = 1.0 / values[i]
    return output



In [76]:

    
values = np.random.randint(1,10, size=5)
compute_reciprocal(values)









    Out[76]:





array([ 0.16666667,  1.        ,  0.25      ,  0.25      ,  0.125     ])



In [77]:

    
big_array = np.random.randint(1, 100 , size=1000000)
%timeit compute_reciprocal(big_array)









    



2.22 s ± 31.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)



In [78]:

    
print(compute_reciprocal(values))
print(1.0 / values)









    



[ 0.16666667  1.          0.25        0.25        0.125     ]
[ 0.16666667  1.          0.25        0.25        0.125     ]



In [79]:

    
%timeit (1.0 / values)









    



1.78 µs ± 22.1 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)



In [80]:

    
# array arithmetic
x = np.arange(7)
print("x     = ", x)
print("x + 2 = ", x +2)
print("x * 2 = ", x * 2)
print("x / 2 = ", x / 2)
print("x //2 =", x // 2) #floor devision









    



x     =  [0 1 2 3 4 5 6]
x + 2 =  [2 3 4 5 6 7 8]
x * 2 =  [ 0  2  4  6  8 10 12]
x / 2 =  [ 0.   0.5  1.   1.5  2.   2.5  3. ]
x //2 = [0 0 1 1 2 2 3]



In [81]:

    
# more operations
((x ** 2) + 2 ) * (-x)









    Out[81]:





array([   0,   -3,  -12,  -33,  -72, -135, -228])



In [82]:

    
# arithmetic operations implemented in numpy
np.multiply(np.add( np.power(x, 2), 2 ), np.negative(x))









    Out[82]:





array([   0,   -3,  -12,  -33,  -72, -135, -228])



In [83]:

    
# absolute value
z = np.array([-5, -2, 0, 1])
abs(z)









    Out[83]:





array([5, 2, 0, 1])



In [84]:

    
np.absolute(z)









    Out[84]:





array([5, 2, 0, 1])



In [85]:

    
np.abs(z)









    Out[85]:





array([5, 2, 0, 1])



In [86]:

    
np.absolute(z) == np.abs(z)









    Out[86]:





array([ True,  True,  True,  True], dtype=bool)



In [87]:

    
# trigonometric functions
theta = np.linspace(0, np.pi, 3)
theta









    Out[87]:





array([ 0.        ,  1.57079633,  3.14159265])



In [88]:

    
print(np.sin(theta))
print(np.tan(theta))









    



[  0.00000000e+00   1.00000000e+00   1.22464680e-16]
[  0.00000000e+00   1.63312394e+16  -1.22464680e-16]



In [89]:

    
print(x)
print(np.sin(x))
print(np.cos(x))









    



[0 1 2 3 4 5 6]
[ 0.          0.84147098  0.90929743  0.14112001 -0.7568025  -0.95892427
 -0.2794155 ]
[ 1.          0.54030231 -0.41614684 -0.9899925  -0.65364362  0.28366219
  0.96017029]



In [90]:

    
# Exponents and logarithms
print(x)
print(np.power(x, 2))
print(np.power(x, 3))









    



[0 1 2 3 4 5 6]
[ 0  1  4  9 16 25 36]
[  0   1   8  27  64 125 216]



In [91]:

    
from scipy import special
#gamma functions
x = [1, 5, 10]
print("gamma(x) = ", special.gamma(x))
print("ln|gamma(x) =", special.gammaln(x))









    



gamma(x) =  [  1.00000000e+00   2.40000000e+01   3.62880000e+05]
ln|gamma(x) = [  0.           3.17805383  12.80182748]

Advanced Ufunc features



In [92]:

    
x = np.arange(4)
np.multiply(x, 2, out=x)
x









    Out[92]:





array([0, 2, 4, 6])



In [93]:

    
# x and y has to have same amount of elements
x = np.arange(5)
y = np.empty(5)
np.multiply(x, 2, out=y)
y









    Out[93]:





array([ 0.,  2.,  4.,  6.,  8.])



In [94]:

    
k = np.zeros(10)
np.power(2, x, out=k[::2])
k









    Out[94]:





array([  1.,   0.,   2.,   0.,   4.,   0.,   8.,   0.,  16.,   0.])



In [95]:

    
# aggregate
x = np.arange(1, 6)
print(np.add.reduce(x))
print(np.multiply.reduce(x))



In [96]:

    
np.add.accumulate(x)









    Out[96]:





array([ 1,  3,  6, 10, 15])

Aggregations: Min. Max. Std. Median. Mean



In [97]:

    
x = np.random.randint(1, 1000, size=10000000)



In [98]:

    
# summing all values in an array
%timeit sum(x)   #python code
%timeit np.sum(x)  #numpy code









    



938 ms ± 61 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
6.35 ms ± 95.8 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)



In [99]:

    
# min and max
print(min(x), max(x) ) # python code
print(np.min(x) , np.max(x))



In [100]:

    
# again numpy operates much more quickly
%timeit (min(x), max(x))
%timeit (np.min(x) , np.max(x))









    



1.28 s ± 22.3 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
15.4 ms ± 207 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)



In [101]:

    
# Multidimensional aggregation
M = np.random.randint(1,5, size=(3,4))
M









    Out[101]:





array([[4, 2, 4, 2],
       [4, 1, 4, 4],
       [1, 4, 3, 3]])



In [102]:

    
# sum of all values
M.sum()









    Out[102]:





36



In [103]:

    
# min of each column
np.min(M, axis=0)









    Out[103]:





array([1, 1, 3, 2])



In [104]:

    
# max of each column
np.max(M, axis=1)









    Out[104]:





array([4, 4, 4])



In [105]:

    
# sum of each row
np.sum(M, axis=1)









    Out[105]:





array([12, 13, 11])

Other aggreagation functions



In [106]:

    
n = [1,3,4]



In [107]:

    
#product of elements
np.prod(n)









    Out[107]:





12



In [108]:

    
np.std(n)









    Out[108]:





1.247219128924647



In [109]:

    
np.min(n), np.max(n)









    Out[109]:





(1, 4)



In [110]:

    
# index of min and max
np.argmin(n), np.argmax(n)









    Out[110]:





(0, 2)



In [111]:

    
np.median(n), np.mean(n)









    Out[111]:





(3.0, 2.6666666666666665)

Example



In [112]:

    
!head -4 data/president_heights.csv









    



order,name,height(cm)
1,George Washington,189
2,John Adams,170
3,Thomas Jefferson,189



In [113]:

    
import pandas as pd
data = pd.read_csv('data/president_heights.csv', index_col='order')
data.head()









    Out[113]:






  
    
      
      name
      height(cm)
    
    
      order
      
      
    
  
  
    
      1
      George Washington
      189
    
    
      2
      John Adams
      170
    
    
      3
      Thomas Jefferson
      189
    
    
      4
      James Madison
      163
    
    
      5
      James Monroe
      183



In [114]:

    
data.info()









    



<class 'pandas.core.frame.DataFrame'>
Int64Index: 42 entries, 1 to 44
Data columns (total 2 columns):
name          42 non-null object
height(cm)    42 non-null int64
dtypes: int64(1), object(1)
memory usage: 1008.0+ bytes



In [115]:

    
data.describe()



In [116]:

    
heights = np.array(data['height(cm)'])
len(heights)









    Out[116]:





42



In [117]:

    
print("Mean height:", np.mean(heights))
print("Std of heigth:",np.std(heights, ddof=1))
print('min of height:', np.min(heights))
print('max of heights:', np.max(heights))
print('median of heights:', np.median(heights))









    



Mean height: 179.738095238
Std of heigth: 7.01586885536
min of height: 163
max of heights: 193
median of heights: 182.0



In [118]:

    
%matplotlib inline
import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')



In [119]:

    
plt.figure(figsize=(10,5) )
plt.hist(heights)
plt.title('Height distribution of US Presidents')
plt.xlabel('height (cm)')
plt.ylabel('number')
plt.show()

Broadcasting



In [120]:

    
a = np.array([0, 1, 2])
b = np.array([5, 5, 5])
a + b









    Out[120]:





array([5, 6, 7])



In [121]:

    
a + 5









    Out[121]:





array([5, 6, 7])



In [122]:

    
M = np.ones((3, 3))
M









    Out[122]:





array([[ 1.,  1.,  1.],
       [ 1.,  1.,  1.],
       [ 1.,  1.,  1.]])



In [123]:

    
M + a









    Out[123]:





array([[ 1.,  2.,  3.],
       [ 1.,  2.,  3.],
       [ 1.,  2.,  3.]])



In [124]:

    
x = np.arange(3)
y = np.arange(3)[:,np.newaxis]



In [125]:

    
print(x)
print(y)









    



[0 1 2]
[[0]
 [1]
 [2]]



In [126]:

    
x + y









    Out[126]:





array([[0, 1, 2],
       [1, 2, 3],
       [2, 3, 4]])



In [127]:

    
#Broadcasting example 1
M = np.ones((2,3))
a = np.arange(3)

print("Shape of M:", M.shape)
print(M)
print(' ')
print("shape of a:",a.shape) 
print(a)
print('')
print("shape of M + a:", (M+a).shape) 
print(M + a)









    



Shape of M: (2, 3)
[[ 1.  1.  1.]
 [ 1.  1.  1.]]
 
shape of a: (3,)
[0 1 2]

shape of M + a: (2, 3)
[[ 1.  2.  3.]
 [ 1.  2.  3.]]



In [128]:

    
# Broadcasting example 2
a = np.arange(3).reshape((3,1))
print(a)
print('Shape of a:', a.shape)
print(' ')
b= np.arange(3)
print(b)
print('Shape of b:',b.shape)
print(' ')
print(a + b)
print('Shape of a + b:',(a+b).shape)









    



[[0]
 [1]
 [2]]
Shape of a: (3, 1)
 
[0 1 2]
Shape of b: (3,)
 
[[0 1 2]
 [1 2 3]
 [2 3 4]]
Shape of a + b: (3, 3)



In [129]:

    
# Broadcasting example 3
M = np.ones((3,2))
print(M)
print('Shape of M:', M.shape)
print('')
a = np.arange(3)
print(a)
print('Shape of a:', a.shape)
print('')
print("M + a throws an error. These arrays are incompatible.")









    



[[ 1.  1.]
 [ 1.  1.]
 [ 1.  1.]]
Shape of M: (3, 2)

[0 1 2]
Shape of a: (3,)

M + a throws an error. These arrays are incompatible.



In [130]:

    
# Broadcasting in practice
X = np.random.randint(0, 5,size=(3, 3))
X









    Out[130]:





array([[3, 2, 2],
       [1, 2, 3],
       [0, 4, 0]])



In [131]:

    
#mean of each feature (row)
Xmean = X.mean(axis=0)
Xmean









    Out[131]:





array([ 1.33333333,  2.66666667,  1.66666667])



In [132]:

    
Xcentered = Xmean - X
Xcentered









    Out[132]:





array([[-1.66666667,  0.66666667, -0.33333333],
       [ 0.33333333,  0.66666667, -1.33333333],
       [ 1.33333333, -1.33333333,  1.66666667]])



In [133]:

    
Xcentered.mean(0)









    Out[133]:





array([ -7.40148683e-17,  -1.48029737e-16,   7.40148683e-17])



In [134]:

    
# plotting two dimensional function z = f(x, y)
x = np.linspace(0, 5, 50)
y = np.linspace(0, 5, 50)[:,np.newaxis]

z = np.sin(x) ** 10 + np.cos(10 + y * x) * np.cos(x)

#plot the function
plt.imshow(z, origin='lower', extent=[0,5,0,5], cmap='viridis')
plt.colorbar();



In [135]:

    
rainfall = pd.read_csv('data/Seattle2014.csv')['PRCP']
inches = rainfall / 254 #1/10mm = 1 inch
inches.shape  # 365 days in a year









    Out[135]:





(365,)



In [136]:

    
plt.hist(inches, bins=40);



In [137]:

    
#working with 2-dimensional array
f = np.random.randint(10, size=(3,4))
f









    Out[137]:





array([[6, 4, 9, 2],
       [2, 4, 1, 2],
       [7, 4, 4, 1]])



In [138]:

    
np.count_nonzero(f < 6)









    Out[138]:





9



In [139]:

    
# this code and cod above result the same. in this case, True=1 , False =0
np.sum(f < 6)









    Out[139]:





9



In [140]:

    
# how many values less than 3 for each row
np.sum( f < 3, axis = 1)









    Out[140]:





array([1, 3, 1])



In [141]:

    
# np.any() , np.all()
np.any(f == 0 ) , np.all(f==0)









    Out[141]:





(False, False)



In [142]:

    
np.any(f > 5 , axis=1)









    Out[142]:





array([ True, False,  True], dtype=bool)



In [143]:

    
# go back to Seattlle rainy days data

# how many days rained less than 4 inches and greater than 1 inch
np.sum((inches > 0.5) & (inches < 1))









    Out[143]:





29



In [144]:

    
# Other examples
print("Number of days without rain:", np.sum(inches == 0))
print("Number of days with rain:", np.sum(inches != 0))
print("Days with more than 0.5 inches:", np.sum(inches > 0.5) )
print("Rainy days with < 0.1 inches:", np.sum((inches < 0.2) & (inches != 0))) 
#0.2 equals 0.1 inches









    



Number of days without rain: 215
Number of days with rain: 150
Days with more than 0.5 inches: 37
Rainy days with < 0.1 inches: 75



In [145]:

    
X









    Out[145]:





array([[3, 2, 2],
       [1, 2, 3],
       [0, 4, 0]])



In [146]:

    
# less than 3 of all values
print(X < 3)
print('Return is one dimensional array')
print(X[X < 3])









    



[[False  True  True]
 [ True  True False]
 [ True False  True]]
Return is one dimensional array
[2 2 1 2 0 0]



In [147]:

    
lessThan3 = X[X < 3]
lessThan3









    Out[147]:





array([2, 2, 1, 2, 0, 0])



In [148]:

    
# we can do statisctics on this new array
np.mean(lessThan3), np.min(lessThan3)









    Out[148]:





(1.1666666666666667, 0)



In [149]:

    
# go back to Seattle rain data. 
# rainy days mask
rainy = inches > 0
#summer days june 21th 172. day
summer = (np.arange(365) - 172 < 90) & (np.arange(365) - 172 > 0)



In [150]:

    
print("Median precip on rainy days:", np.median(inches[rainy]))
print("Median precip on summer days:", np.median(inches[summer]))
print("Max precip on rainy days:", np.max(inches[rainy]))
print("Max precip on summer days:", np.max(inches[summer]))
print("Median precip on non-summer rainy days:", np.median(inches[rainy & ~summer]))









    



Median precip on rainy days: 0.194881889764
Median precip on summer days: 0.0
Max precip on rainy days: 1.83858267717
Max precip on summer days: 0.850393700787
Median precip on non-summer rainy days: 0.200787401575



In [151]:

    
t = np.arange(10)
t









    Out[151]:





array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])



In [152]:

    
t[(t > 3 ) & (t < 6)]









    Out[152]:





array([4, 5])

Fancy Indexing



In [153]:

    
rand = np.random.RandomState(42)
r = rand.randint(100, size=10)
r









    Out[153]:





array([51, 92, 14, 71, 60, 20, 82, 86, 74, 74])



In [154]:

    
# instead of using 
r[2], r[5], r[7]









    Out[154]:





(14, 20, 86)



In [155]:

    
# pass a single list or array of indices  
ind= [2, 5, 7]
r[ind]









    Out[155]:





array([14, 20, 86])



In [156]:

    
ind2 = np.array([[3,7],
                [4,5]])
r[ind2]









    Out[156]:





array([[71, 86],
       [60, 20]])



In [157]:

    
# combined indexing
Y = np.arange(12).reshape(3,4)
Y









    Out[157]:





array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])



In [158]:

    
# fancy and simple indices
Y[2, [2, 0, 1]]









    Out[158]:





array([10,  8,  9])



In [159]:

    
# fancy indexing with slicing
Y[1:,[1,2]]









    Out[159]:





array([[ 5,  6],
       [ 9, 10]])

Selecting random points



In [160]:

    
mean=[0,0]
cov=[[1,2],
     [2,5]]
X = rand.multivariate_normal(mean, cov, 100)
X.shape









    Out[160]:





(100, 2)



In [161]:

    
X.shape[0]









    Out[161]:





100



In [162]:

    
X[:5]









    Out[162]:





array([[-0.644508  , -0.46220608],
       [ 0.7376352 ,  1.21236921],
       [ 0.88151763,  1.12795177],
       [ 2.04998983,  5.97778598],
       [-0.1711348 , -2.06258746]])



In [163]:

    
plt.scatter(X[:,0], X[:, 1]);



In [164]:

    
# select 20 random points 
indices = np.random.choice(X.shape[0], 20, replace=False)
indices









    Out[164]:





array([41, 24, 79, 26, 38, 18,  2, 64, 67, 25, 13, 17, 14, 37, 31, 91, 80,
       98, 66, 92])



In [165]:

    
selection = X[indices] #fancy indexing here
selection, selection.shape









    Out[165]:





(array([[-1.33461668, -3.03203218],
        [ 0.4299908 ,  0.36324254],
        [-0.31332021, -1.7895623 ],
        [ 0.32124996,  0.33137032],
        [ 0.35936721,  0.40554974],
        [ 1.53817376,  1.99973464],
        [ 0.88151763,  1.12795177],
        [ 0.13143756, -0.07833855],
        [-0.51172373, -1.40960773],
        [ 0.97253528,  3.53815717],
        [ 0.49514263,  1.18939673],
        [-0.53943081, -0.3478899 ],
        [ 0.0629872 ,  0.57349278],
        [ 0.44457363,  1.87828298],
        [-1.51101746, -3.2818741 ],
        [-0.16863279,  0.39422355],
        [ 1.12659538,  1.49627535],
        [-0.99658689, -2.35038099],
        [-0.14547282, -1.34125678],
        [ 0.12065651,  1.13236323]]), (20, 2))



In [166]:

    
plt.scatter(X[:,0], X[:,1], alpha=0.3)
plt.scatter(selection[:,0], selection[:,1], s=200,facecolor=None);



In [167]:

    
# modifying values with fancy indexing
x = np.arange(10)
i = np.array([2,1,8,4])
x[i] = 99
x









    Out[167]:





array([ 0, 99, 99,  3, 99,  5,  6,  7, 99,  9])



In [168]:

    
x[i] -=10
x









    Out[168]:





array([ 0, 89, 89,  3, 89,  5,  6,  7, 89,  9])



In [169]:

    
# to fix the nonintuitive result use at() ufunc
n = np.zeros(10)
i = np.array([2,1,8,4])
np.add.at(n, i, 1)
n









    Out[169]:





array([ 0.,  1.,  1.,  0.,  1.,  0.,  0.,  0.,  1.,  0.])



In [170]:

    
# Bining Data
np.random.seed(42)
x = np.random.randn(100)

#compute hist by hand
bins = np.linspace(-5, 5, 20)
counts = np.zeros_like(bins)

#find appropreate bin for each x
i = np.searchsorted(bins, x)

#add 1 each of these bins
np.add.at(counts, i, 1)

#plot the result
plt.plot(bins, counts, linestyle= 'steps');



In [171]:

    
# this deos the same above. 
plt.hist(x, bins, histtype='step');



In [172]:

    
# matplotlib uses np.histogram function to create this chart. 
#lets compare both
print("NumPy routine:")
%timeit counts, edges = np.histogram(x, bins)
print('')
print('Custom routine:')
%timeit np.add.at(counts, np.searchsorted(bins,x), 1)









    



NumPy routine:
87.5 µs ± 17.7 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)

Custom routine:
17.7 µs ± 757 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)



In [173]:

    
# np.histogram?? ##for more information

Sorting Arrays



In [174]:

    
x = np.array([1,7,8,3,9,11])
np.sort(x)









    Out[174]:





array([ 1,  3,  7,  8,  9, 11])



In [175]:

    
# if you prefer to sort array in-place, use .sort method  
print("x before:", x)
x.sort()
print("x after sorted in place:", x)









    



x before: [ 1  7  8  3  9 11]
x after sorted in place: [ 1  3  7  8  9 11]



In [176]:

    
# argsort returns indicies for the sorted elements
y = np.array([3,5,8,1,6])
i = np.argsort(y)
i









    Out[176]:





array([3, 0, 1, 4, 2])



In [177]:

    
# you  may use fancy  indexing alter
y[i]









    Out[177]:





array([1, 3, 5, 6, 8])



In [178]:

    
# sorting along rows and columns
rand = np.random.RandomState(42)
X = rand.randint(0, 10, (4,6))
X









    Out[178]:





array([[6, 3, 7, 4, 6, 9],
       [2, 6, 7, 4, 3, 7],
       [7, 2, 5, 4, 1, 7],
       [5, 1, 4, 0, 9, 5]])



In [179]:

    
#sort each column of X
np.sort(X, axis=0)









    Out[179]:





array([[2, 1, 4, 0, 1, 5],
       [5, 2, 5, 4, 3, 7],
       [6, 3, 7, 4, 6, 7],
       [7, 6, 7, 4, 9, 9]])



In [180]:

    
# sort each row of X
np.sort(X, axis =1)









    Out[180]:





array([[3, 4, 6, 6, 7, 9],
       [2, 3, 4, 6, 7, 7],
       [1, 2, 4, 5, 7, 7],
       [0, 1, 4, 5, 5, 9]])



In [186]:

    
# partial sorting
# find the 3 smallest values in the array
x = np.array([11,3,6,70,4,1, 99])
np.partition(x, 3)









    Out[186]:





array([ 3,  1,  4,  6, 11, 70, 99])

Example: k-Nearest Neighbors



In [192]:

    
#create random 10 points on a two dimensional array
X = rand.randint(0, 11,(10,2))
X









    Out[192]:





array([[3, 7],
       [6, 8],
       [7, 4],
       [1, 4],
       [7, 9],
       [8, 8],
       [0, 8],
       [6, 8],
       [7, 0],
       [7, 7]])



In [193]:

    
plt.scatter(X[:,0], X[:,1], s=100);



In [200]:

    
X[:, np.newaxis,:] - X[np.newaxis, :, :]









    Out[200]:





array([[[ 0,  0],
        [-3, -1],
        [-4,  3],
        [ 2,  3],
        [-4, -2],
        [-5, -1],
        [ 3, -1],
        [-3, -1],
        [-4,  7],
        [-4,  0]],

       [[ 3,  1],
        [ 0,  0],
        [-1,  4],
        [ 5,  4],
        [-1, -1],
        [-2,  0],
        [ 6,  0],
        [ 0,  0],
        [-1,  8],
        [-1,  1]],

       [[ 4, -3],
        [ 1, -4],
        [ 0,  0],
        [ 6,  0],
        [ 0, -5],
        [-1, -4],
        [ 7, -4],
        [ 1, -4],
        [ 0,  4],
        [ 0, -3]],

       [[-2, -3],
        [-5, -4],
        [-6,  0],
        [ 0,  0],
        [-6, -5],
        [-7, -4],
        [ 1, -4],
        [-5, -4],
        [-6,  4],
        [-6, -3]],

       [[ 4,  2],
        [ 1,  1],
        [ 0,  5],
        [ 6,  5],
        [ 0,  0],
        [-1,  1],
        [ 7,  1],
        [ 1,  1],
        [ 0,  9],
        [ 0,  2]],

       [[ 5,  1],
        [ 2,  0],
        [ 1,  4],
        [ 7,  4],
        [ 1, -1],
        [ 0,  0],
        [ 8,  0],
        [ 2,  0],
        [ 1,  8],
        [ 1,  1]],

       [[-3,  1],
        [-6,  0],
        [-7,  4],
        [-1,  4],
        [-7, -1],
        [-8,  0],
        [ 0,  0],
        [-6,  0],
        [-7,  8],
        [-7,  1]],

       [[ 3,  1],
        [ 0,  0],
        [-1,  4],
        [ 5,  4],
        [-1, -1],
        [-2,  0],
        [ 6,  0],
        [ 0,  0],
        [-1,  8],
        [-1,  1]],

       [[ 4, -7],
        [ 1, -8],
        [ 0, -4],
        [ 6, -4],
        [ 0, -9],
        [-1, -8],
        [ 7, -8],
        [ 1, -8],
        [ 0,  0],
        [ 0, -7]],

       [[ 4,  0],
        [ 1, -1],
        [ 0,  3],
        [ 6,  3],
        [ 0, -2],
        [-1, -1],
        [ 7, -1],
        [ 1, -1],
        [ 0,  7],
        [ 0,  0]]])



In [ ]:



In [ ]:



In [ ]:



In [ ]:

	height(cm)
count	42.000000
mean	179.738095
std	7.015869
min	163.000000
25%	174.250000
50%	182.000000
75%	183.000000
max	193.000000

	name	height(cm)
order
1	George Washington	189
2	John Adams	170
3	Thomas Jefferson	189
4	James Madison	163
5	James Monroe	183