In [1]:
import numpy
print("numpy version: ",numpy.__version__)
!python --version
In [2]:
# python list holds many python objects
L = list(range(10))
L, type(L), type(L[0])
Out[2]:
In [3]:
L2 = list(str(i) for i in L)
L2, type(L2), type(L2[0])
Out[3]:
In [4]:
#because of python's dynamic typing, it can hold heterogenous lists
L3 = [True, "2", 3.0, 4]
[type(item) for item in L3]
Out[4]:
In [5]:
#built-in array module in ptyhon since 3.3
import array
L = list(range(10))
A = array.array('i', L)
A
Out[5]:
1.np.array to create numpy array from python list
In [6]:
import numpy as np
Pylist = [1, 2, 3]
In [7]:
NumpyList = np.array(Pylist)
In [8]:
NumpyList
Out[8]:
In [9]:
type(Pylist) , type(NumpyList)
Out[9]:
In [10]:
# declare the type of list
np.array([1,2,3,4], dtype=float)
Out[10]:
In [11]:
# multidimensional numpy array
np.array([range(i, i+3) for i in [2,4,6]])
Out[11]:
2.from scratch
In [12]:
# create zeros
np.zeros(3, dtype=int)
Out[12]:
In [13]:
# create 3x5 array filled with 1s
np.ones((3,5), dtype=float)
Out[13]:
In [14]:
# create an array of 5s
np.full((2,4), 5, dtype=float)
Out[14]:
In [15]:
#array of linear sequence from 4 to 10
np.arange(4, 10, 2)
Out[15]:
In [16]:
# array of evenly spaced values
np.linspace(0, 1, 5)
Out[16]:
In [17]:
# uniformly distributed random values between 0 and 1
np.random.random((3,4))
Out[17]:
In [18]:
# normally distributed values mean=0, std= 1
np.random.normal(0,1, (2,3))
Out[18]:
In [19]:
# random integer, interval [0, 10)
np.random.randint(0, 10, (2,5))
Out[19]:
In [20]:
# identity matrix
np.eye(3)
Out[20]:
In [21]:
np.random.random_sample((3,4))
Out[21]:
In [22]:
#creare an uninitialized array of three integers. the values will be whatever happens
#already exists in that memory location
np.empty(3)
Out[22]:
a. Attributes of arrays
In [23]:
import numpy as np
np.random.seed(0) #to generate the same random array each time this code run
x1 = np.random.randint(10, size=6) #one dimensional array
x2 = np.random.randint(10, size=(3,4)) #two dimensional array
x3 = np.random.randint(10, size=(3,4,5)) #three dimensional array
In [24]:
x1
Out[24]:
In [25]:
x2
Out[25]:
In [26]:
x3
Out[26]:
In [27]:
print("x3 ndim:", x3.ndim)
print("x3 shape:", x3.shape)
print("x3 size:", x3.size)
print("x3 data:", x3.data)
print("x3 dtype:", x3.dtype)
print("x3 item:", x3.item)
print("x3 itemsize:", x3.itemsize, "bytes")
print("x3 nbytes:", x3.nbytes, "bytes")
b. Array indexing
In [28]:
x1
Out[28]:
In [29]:
x1[4]
Out[29]:
In [30]:
x1[-1]
Out[30]:
In [31]:
x2
Out[31]:
In [32]:
x2[0, 0]
Out[32]:
In [33]:
# colon is for slicing. see the difference above and below.
x2[0:1]
Out[33]:
In [34]:
x2[1,1]
Out[34]:
In [35]:
x2[2,-3]
Out[35]:
In [36]:
# change the values in the array
x2[2,0] = 7
x2[2,1] = 7
In [37]:
x2
Out[37]:
c. Array slicing x[start : stop : step] default values 0
In [38]:
x = np.arange(10)
x
Out[38]:
In [39]:
x[:5] #first 5 elements
Out[39]:
In [40]:
x[::2] #every other element
Out[40]:
In [41]:
x[1::2] #every other element starting from 1
Out[41]:
In [42]:
np.arange(1,10, 2) #example of arange method
Out[42]:
In [43]:
x[::-1] #all elements, reversed
Out[43]:
In [44]:
# Multidimensional subarrays
x2
Out[44]:
In [45]:
x2[0] #first row
Out[45]:
In [46]:
x2[2] #third row
Out[46]:
In [47]:
x2[2:] == x2[2]
Out[47]:
In [48]:
x2[1:2, 1:3] #second row intersection between second and third column
Out[48]:
In [49]:
x2[::-1, ::-1] #can be reversed
Out[49]:
In [50]:
x2[:,0] #first column
Out[50]:
In [51]:
# copying feature
x2_sub = x2[:2 , :2]
x2_sub
Out[51]:
In [52]:
x2_sub[0,0] = 61
x2_sub
Out[52]:
In [53]:
# modifying subarray affects the original array as well
x2
Out[53]:
In [54]:
#use copy method to keep the original array not updated
x2_sub_copy = x2[:2, :2].copy()
x2_sub_copy
Out[54]:
In [55]:
x2_sub_copy[0,0] = 99
x2_sub_copy
Out[55]:
In [56]:
x2
Out[56]:
d. Reshaping of arrays
In [57]:
# use reshape method
grid = np.arange(1,10).reshape((3,3))
grid
Out[57]:
In [58]:
# convert one dimensional array into two dimensional row or column matrix
y = np.array([1,2,3])
y
Out[58]:
In [59]:
# row vector via reshape
y.reshape((1,3))
Out[59]:
In [60]:
#row vector via newaxis
y[np.newaxis, :]
Out[60]:
In [61]:
# column vector via reshape
y.reshape((3,1))
Out[61]:
In [62]:
#column vector via newaxis
y[:, np.newaxis]
Out[62]:
e. Array concatination and splitting
In [63]:
x = np.array([1,2,3])
y = np.array([1,2,3])
z = np.array([61,61,61])
In [64]:
# use np.concatinate
np.concatenate([x,y,z])
Out[64]:
In [65]:
# concat two dimensional arrays
grid
Out[65]:
In [66]:
np.concatenate([grid, grid])
Out[66]:
In [67]:
np.concatenate([grid, grid], axis = 1)
Out[67]:
In [68]:
# vertical stack with vstack
np.vstack([x, grid])
Out[68]:
In [69]:
k = np.array([[99], [99], [99]])
k
Out[69]:
In [70]:
# horizontal stack with hstack
np.hstack([k, grid])
Out[70]:
In [71]:
# splitting of arrays
sp = np.arange(10)
sp
Out[71]:
In [72]:
x1, x2, x3 = np.split(sp, [1, 3])
x1 , x2, x3
Out[72]:
In [73]:
four = np.arange(16).reshape((4,4))
four
Out[73]:
In [74]:
f1, f2 = np.vsplit(four,[2] )
f1, f2
Out[74]:
In [75]:
import numpy as np
np.random.seed(0)
def compute_reciprocal(values):
output = np.empty(len(values))
for i in range(len(values)):
output[i] = 1.0 / values[i]
return output
In [76]:
values = np.random.randint(1,10, size=5)
compute_reciprocal(values)
Out[76]:
In [77]:
big_array = np.random.randint(1, 100 , size=1000000)
%timeit compute_reciprocal(big_array)
In [78]:
print(compute_reciprocal(values))
print(1.0 / values)
In [79]:
%timeit (1.0 / values)
In [80]:
# array arithmetic
x = np.arange(7)
print("x = ", x)
print("x + 2 = ", x +2)
print("x * 2 = ", x * 2)
print("x / 2 = ", x / 2)
print("x //2 =", x // 2) #floor devision
In [81]:
# more operations
((x ** 2) + 2 ) * (-x)
Out[81]:
In [82]:
# arithmetic operations implemented in numpy
np.multiply(np.add( np.power(x, 2), 2 ), np.negative(x))
Out[82]:
In [83]:
# absolute value
z = np.array([-5, -2, 0, 1])
abs(z)
Out[83]:
In [84]:
np.absolute(z)
Out[84]:
In [85]:
np.abs(z)
Out[85]:
In [86]:
np.absolute(z) == np.abs(z)
Out[86]:
In [87]:
# trigonometric functions
theta = np.linspace(0, np.pi, 3)
theta
Out[87]:
In [88]:
print(np.sin(theta))
print(np.tan(theta))
In [89]:
print(x)
print(np.sin(x))
print(np.cos(x))
In [90]:
# Exponents and logarithms
print(x)
print(np.power(x, 2))
print(np.power(x, 3))
In [91]:
from scipy import special
#gamma functions
x = [1, 5, 10]
print("gamma(x) = ", special.gamma(x))
print("ln|gamma(x) =", special.gammaln(x))
In [92]:
x = np.arange(4)
np.multiply(x, 2, out=x)
x
Out[92]:
In [93]:
# x and y has to have same amount of elements
x = np.arange(5)
y = np.empty(5)
np.multiply(x, 2, out=y)
y
Out[93]:
In [94]:
k = np.zeros(10)
np.power(2, x, out=k[::2])
k
Out[94]:
In [95]:
# aggregate
x = np.arange(1, 6)
print(np.add.reduce(x))
print(np.multiply.reduce(x))
In [96]:
np.add.accumulate(x)
Out[96]:
In [97]:
x = np.random.randint(1, 1000, size=10000000)
In [98]:
# summing all values in an array
%timeit sum(x) #python code
%timeit np.sum(x) #numpy code
In [99]:
# min and max
print(min(x), max(x) ) # python code
print(np.min(x) , np.max(x))
In [100]:
# again numpy operates much more quickly
%timeit (min(x), max(x))
%timeit (np.min(x) , np.max(x))
In [101]:
# Multidimensional aggregation
M = np.random.randint(1,5, size=(3,4))
M
Out[101]:
In [102]:
# sum of all values
M.sum()
Out[102]:
In [103]:
# min of each column
np.min(M, axis=0)
Out[103]:
In [104]:
# max of each column
np.max(M, axis=1)
Out[104]:
In [105]:
# sum of each row
np.sum(M, axis=1)
Out[105]:
In [106]:
n = [1,3,4]
In [107]:
#product of elements
np.prod(n)
Out[107]:
In [108]:
np.std(n)
Out[108]:
In [109]:
np.min(n), np.max(n)
Out[109]:
In [110]:
# index of min and max
np.argmin(n), np.argmax(n)
Out[110]:
In [111]:
np.median(n), np.mean(n)
Out[111]:
In [112]:
!head -4 data/president_heights.csv
In [113]:
import pandas as pd
data = pd.read_csv('data/president_heights.csv', index_col='order')
data.head()
Out[113]:
In [114]:
data.info()
In [115]:
data.describe()
Out[115]:
In [116]:
heights = np.array(data['height(cm)'])
len(heights)
Out[116]:
In [117]:
print("Mean height:", np.mean(heights))
print("Std of heigth:",np.std(heights, ddof=1))
print('min of height:', np.min(heights))
print('max of heights:', np.max(heights))
print('median of heights:', np.median(heights))
In [118]:
%matplotlib inline
import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')
In [119]:
plt.figure(figsize=(10,5) )
plt.hist(heights)
plt.title('Height distribution of US Presidents')
plt.xlabel('height (cm)')
plt.ylabel('number')
plt.show()
In [120]:
a = np.array([0, 1, 2])
b = np.array([5, 5, 5])
a + b
Out[120]:
In [121]:
a + 5
Out[121]:
In [122]:
M = np.ones((3, 3))
M
Out[122]:
In [123]:
M + a
Out[123]:
In [124]:
x = np.arange(3)
y = np.arange(3)[:,np.newaxis]
In [125]:
print(x)
print(y)
In [126]:
x + y
Out[126]:
In [127]:
#Broadcasting example 1
M = np.ones((2,3))
a = np.arange(3)
print("Shape of M:", M.shape)
print(M)
print(' ')
print("shape of a:",a.shape)
print(a)
print('')
print("shape of M + a:", (M+a).shape)
print(M + a)
In [128]:
# Broadcasting example 2
a = np.arange(3).reshape((3,1))
print(a)
print('Shape of a:', a.shape)
print(' ')
b= np.arange(3)
print(b)
print('Shape of b:',b.shape)
print(' ')
print(a + b)
print('Shape of a + b:',(a+b).shape)
In [129]:
# Broadcasting example 3
M = np.ones((3,2))
print(M)
print('Shape of M:', M.shape)
print('')
a = np.arange(3)
print(a)
print('Shape of a:', a.shape)
print('')
print("M + a throws an error. These arrays are incompatible.")
In [130]:
# Broadcasting in practice
X = np.random.randint(0, 5,size=(3, 3))
X
Out[130]:
In [131]:
#mean of each feature (row)
Xmean = X.mean(axis=0)
Xmean
Out[131]:
In [132]:
Xcentered = Xmean - X
Xcentered
Out[132]:
In [133]:
Xcentered.mean(0)
Out[133]:
In [134]:
# plotting two dimensional function z = f(x, y)
x = np.linspace(0, 5, 50)
y = np.linspace(0, 5, 50)[:,np.newaxis]
z = np.sin(x) ** 10 + np.cos(10 + y * x) * np.cos(x)
#plot the function
plt.imshow(z, origin='lower', extent=[0,5,0,5], cmap='viridis')
plt.colorbar();
In [135]:
rainfall = pd.read_csv('data/Seattle2014.csv')['PRCP']
inches = rainfall / 254 #1/10mm = 1 inch
inches.shape # 365 days in a year
Out[135]:
In [136]:
plt.hist(inches, bins=40);
In [137]:
#working with 2-dimensional array
f = np.random.randint(10, size=(3,4))
f
Out[137]:
In [138]:
np.count_nonzero(f < 6)
Out[138]:
In [139]:
# this code and cod above result the same. in this case, True=1 , False =0
np.sum(f < 6)
Out[139]:
In [140]:
# how many values less than 3 for each row
np.sum( f < 3, axis = 1)
Out[140]:
In [141]:
# np.any() , np.all()
np.any(f == 0 ) , np.all(f==0)
Out[141]:
In [142]:
np.any(f > 5 , axis=1)
Out[142]:
In [143]:
# go back to Seattlle rainy days data
# how many days rained less than 4 inches and greater than 1 inch
np.sum((inches > 0.5) & (inches < 1))
Out[143]:
In [144]:
# Other examples
print("Number of days without rain:", np.sum(inches == 0))
print("Number of days with rain:", np.sum(inches != 0))
print("Days with more than 0.5 inches:", np.sum(inches > 0.5) )
print("Rainy days with < 0.1 inches:", np.sum((inches < 0.2) & (inches != 0)))
#0.2 equals 0.1 inches
In [145]:
X
Out[145]:
In [146]:
# less than 3 of all values
print(X < 3)
print('Return is one dimensional array')
print(X[X < 3])
In [147]:
lessThan3 = X[X < 3]
lessThan3
Out[147]:
In [148]:
# we can do statisctics on this new array
np.mean(lessThan3), np.min(lessThan3)
Out[148]:
In [149]:
# go back to Seattle rain data.
# rainy days mask
rainy = inches > 0
#summer days june 21th 172. day
summer = (np.arange(365) - 172 < 90) & (np.arange(365) - 172 > 0)
In [150]:
print("Median precip on rainy days:", np.median(inches[rainy]))
print("Median precip on summer days:", np.median(inches[summer]))
print("Max precip on rainy days:", np.max(inches[rainy]))
print("Max precip on summer days:", np.max(inches[summer]))
print("Median precip on non-summer rainy days:", np.median(inches[rainy & ~summer]))
In [151]:
t = np.arange(10)
t
Out[151]:
In [152]:
t[(t > 3 ) & (t < 6)]
Out[152]:
In [153]:
rand = np.random.RandomState(42)
r = rand.randint(100, size=10)
r
Out[153]:
In [154]:
# instead of using
r[2], r[5], r[7]
Out[154]:
In [155]:
# pass a single list or array of indices
ind= [2, 5, 7]
r[ind]
Out[155]:
In [156]:
ind2 = np.array([[3,7],
[4,5]])
r[ind2]
Out[156]:
In [157]:
# combined indexing
Y = np.arange(12).reshape(3,4)
Y
Out[157]:
In [158]:
# fancy and simple indices
Y[2, [2, 0, 1]]
Out[158]:
In [159]:
# fancy indexing with slicing
Y[1:,[1,2]]
Out[159]:
In [160]:
mean=[0,0]
cov=[[1,2],
[2,5]]
X = rand.multivariate_normal(mean, cov, 100)
X.shape
Out[160]:
In [161]:
X.shape[0]
Out[161]:
In [162]:
X[:5]
Out[162]:
In [163]:
plt.scatter(X[:,0], X[:, 1]);
In [164]:
# select 20 random points
indices = np.random.choice(X.shape[0], 20, replace=False)
indices
Out[164]:
In [165]:
selection = X[indices] #fancy indexing here
selection, selection.shape
Out[165]:
In [166]:
plt.scatter(X[:,0], X[:,1], alpha=0.3)
plt.scatter(selection[:,0], selection[:,1], s=200,facecolor=None);
In [167]:
# modifying values with fancy indexing
x = np.arange(10)
i = np.array([2,1,8,4])
x[i] = 99
x
Out[167]:
In [168]:
x[i] -=10
x
Out[168]:
In [169]:
# to fix the nonintuitive result use at() ufunc
n = np.zeros(10)
i = np.array([2,1,8,4])
np.add.at(n, i, 1)
n
Out[169]:
In [170]:
# Bining Data
np.random.seed(42)
x = np.random.randn(100)
#compute hist by hand
bins = np.linspace(-5, 5, 20)
counts = np.zeros_like(bins)
#find appropreate bin for each x
i = np.searchsorted(bins, x)
#add 1 each of these bins
np.add.at(counts, i, 1)
#plot the result
plt.plot(bins, counts, linestyle= 'steps');
In [171]:
# this deos the same above.
plt.hist(x, bins, histtype='step');
In [172]:
# matplotlib uses np.histogram function to create this chart.
#lets compare both
print("NumPy routine:")
%timeit counts, edges = np.histogram(x, bins)
print('')
print('Custom routine:')
%timeit np.add.at(counts, np.searchsorted(bins,x), 1)
In [173]:
# np.histogram?? ##for more information
In [174]:
x = np.array([1,7,8,3,9,11])
np.sort(x)
Out[174]:
In [175]:
# if you prefer to sort array in-place, use .sort method
print("x before:", x)
x.sort()
print("x after sorted in place:", x)
In [176]:
# argsort returns indicies for the sorted elements
y = np.array([3,5,8,1,6])
i = np.argsort(y)
i
Out[176]:
In [177]:
# you may use fancy indexing alter
y[i]
Out[177]:
In [178]:
# sorting along rows and columns
rand = np.random.RandomState(42)
X = rand.randint(0, 10, (4,6))
X
Out[178]:
In [179]:
#sort each column of X
np.sort(X, axis=0)
Out[179]:
In [180]:
# sort each row of X
np.sort(X, axis =1)
Out[180]:
In [186]:
# partial sorting
# find the 3 smallest values in the array
x = np.array([11,3,6,70,4,1, 99])
np.partition(x, 3)
Out[186]:
In [192]:
#create random 10 points on a two dimensional array
X = rand.randint(0, 11,(10,2))
X
Out[192]:
In [193]:
plt.scatter(X[:,0], X[:,1], s=100);
In [200]:
X[:, np.newaxis,:] - X[np.newaxis, :, :]
Out[200]:
In [ ]:
In [ ]:
In [ ]:
In [ ]: